12 #ifndef EIGENRAND_RAND_UTILS_H
13 #define EIGENRAND_RAND_UTILS_H
23 template<
typename IntPacket>
24 EIGEN_STRONG_INLINE
auto bit_to_ur_float(
const IntPacket& x) -> decltype(reinterpret_to_float(x))
26 using FloatPacket = decltype(reinterpret_to_float(x));
28 const IntPacket lower = pset1<IntPacket>(0x7FFFFF),
29 upper = pset1<IntPacket>(127 << 23);
30 const FloatPacket one = pset1<FloatPacket>(1);
32 return psub(reinterpret_to_float(por(pand(x, lower), upper)), one);
35 template<
typename IntPacket>
36 EIGEN_STRONG_INLINE
auto bit_to_ur_double(
const IntPacket& x) -> decltype(reinterpret_to_double(x))
38 using DoublePacket = decltype(reinterpret_to_double(x));
40 const IntPacket lower = pseti64<IntPacket>(0xFFFFFFFFFFFFFull),
41 upper = pseti64<IntPacket>(1023ull << 52);
42 const DoublePacket one = pset1<DoublePacket>(1);
44 return psub(reinterpret_to_double(por(pand(x, lower), upper)), one);
47 template<
typename Scalar>
51 struct bit_scalar<float>
53 float to_ur(uint32_t x)
60 u = (x & 0x7FFFFF) | (127 << 23);
64 float to_nzur(uint32_t x)
66 return to_ur(x) + std::numeric_limits<float>::epsilon() / 8;
71 struct bit_scalar<double>
73 double to_ur(uint64_t x)
80 u = (x & 0xFFFFFFFFFFFFFull) | (1023ull << 52);
84 double to_nzur(uint64_t x)
86 return to_ur(x) + std::numeric_limits<double>::epsilon() / 8;
90 template<
typename Packet,
typename Rng,
91 typename RngResult =
typename std::remove_reference<Rng>::type::result_type,
92 Rand::RandomEngineType reType = Rand::GetRandomEngineType<
93 typename std::remove_reference<Rng>::type
97 template<
typename PacketType,
typename Rng>
98 struct UniformRealUtils;
100 template<
typename PacketType,
typename Rng>
101 struct RandUtils :
public UniformRealUtils<PacketType, Rng>
103 EIGEN_STRONG_INLINE PacketType
balanced(Rng& rng)
105 return psub(pmul(this->zero_to_one(rng), pset1<PacketType>(2)), pset1<PacketType>(1));
108 EIGEN_STRONG_INLINE PacketType nonzero_uniform_real(Rng& rng)
110 constexpr
auto epsilon = std::numeric_limits<typename unpacket_traits<PacketType>::type>::epsilon() / 8;
111 return padd(this->uniform_real(rng), pset1<PacketType>(epsilon));
115 EIGEN_STRONG_INLINE uint32_t collect_upper8bits(uint32_t a, uint32_t b, uint32_t c)
117 return ((a & 0xFF000000) >> 24) | ((b & 0xFF000000) >> 16) | ((c & 0xFF000000) >> 8);
122 #ifdef EIGEN_VECTORIZE_AVX
123 #include <immintrin.h>
129 template<
typename Rng>
130 struct RawbitsMaker<Packet4i, Rng, Packet8i, Rand::RandomEngineType::packet>
132 EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
137 EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
142 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
148 template<
typename Rng>
149 struct RawbitsMaker<Packet8i, Rng, Packet4i, Rand::RandomEngineType::packet>
151 EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
153 return _mm256_insertf128_si256(_mm256_castsi128_si256(rng()), rng(), 1);
156 EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
158 return _mm256_insertf128_si256(_mm256_castsi128_si256(rng()), rng(), 1);
161 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
167 template<
typename Rng,
typename RngResult>
168 struct RawbitsMaker<Packet8i, Rng, RngResult, Rand::RandomEngineType::scalar>
170 EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
172 if (
sizeof(decltype(rng())) == 8)
174 return _mm256_set_epi64x(rng(), rng(), rng(), rng());
178 return _mm256_set_epi32(rng(), rng(), rng(), rng(),
179 rng(), rng(), rng(), rng());
183 EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
186 if (
sizeof(decltype(rng())) == 8)
188 #ifdef EIGEN_VECTORIZE_AVX2
189 p = _mm256_setr_epi64x(rng(), rng(), rng(), 0);
190 p = _mm256_permutevar8x32_epi32(p, _mm256_setr_epi32(0, 1, 2, 7, 3, 4, 5, 7));
191 p = _mm256_shuffle_epi8(p, _mm256_setr_epi8(
204 p = _mm256_setr_epi64x(rng(), v, rng(), v >> 32);
205 Packet4i p1, p2, o = _mm_setr_epi8(
210 split_two(p, p1, p2);
211 p = combine_two(_mm_shuffle_epi8(p1, o), _mm_shuffle_epi8(p2, o));
216 p = _mm256_setr_epi32(rng(), rng(), rng(), 0, rng(), rng(), rng(), 0);
217 #ifdef EIGEN_VECTORIZE_AVX2
218 p = _mm256_shuffle_epi8(p, _mm256_setr_epi8(
229 Packet4i p1, p2, o = _mm_setr_epi8(
234 split_two(p, p1, p2);
235 p = combine_two(_mm_shuffle_epi8(p1, o), _mm_shuffle_epi8(p2, o));
241 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
243 if (
sizeof(decltype(rng())) == 8)
245 return _mm_set_epi64x(rng(), rng());
249 return _mm_set_epi32(rng(), rng(), rng(), rng());
254 template<
typename Rng>
255 struct RawbitsMaker<Packet8i, Rng, Packet8i, Rand::RandomEngineType::packet>
257 EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
262 EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
267 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
273 #ifndef EIGEN_VECTORIZE_AVX2
275 EIGEN_STRONG_INLINE Packet8f bit_to_ur_float<Packet8i>(
const Packet8i& x)
277 const Packet4i lower = pset1<Packet4i>(0x7FFFFF),
278 upper = pset1<Packet4i>(127 << 23);
279 const Packet8f one = pset1<Packet8f>(1);
282 split_two(x, x1, x2);
284 return psub(reinterpret_to_float(
285 combine_two(por(pand(x1, lower), upper), por(pand(x2, lower), upper)
290 EIGEN_STRONG_INLINE Packet4d bit_to_ur_double<Packet8i>(
const Packet8i& x)
292 const Packet4i lower = pseti64<Packet4i>(0xFFFFFFFFFFFFFull),
293 upper = pseti64<Packet4i>(1023ull << 52);
294 const Packet4d one = pset1<Packet4d>(1);
297 split_two(x, x1, x2);
299 return psub(reinterpret_to_double(
300 combine_two(por(pand(x1, lower), upper), por(pand(x2, lower), upper)
305 template<
typename Rng>
306 struct UniformRealUtils<Packet8f, Rng> :
public RawbitsMaker<Packet8i, Rng>
308 EIGEN_STRONG_INLINE Packet8f zero_to_one(Rng& rng)
310 return pdiv(_mm256_cvtepi32_ps(pand(this->rawbits(rng), pset1<Packet8i>(0x7FFFFFFF))),
311 pset1<Packet8f>(0x7FFFFFFF));
314 EIGEN_STRONG_INLINE Packet8f uniform_real(Rng& rng)
316 return bit_to_ur_float(this->rawbits_34(rng));
320 template<
typename Rng>
321 struct UniformRealUtils<Packet4d, Rng> :
public RawbitsMaker<Packet8i, Rng>
323 EIGEN_STRONG_INLINE Packet4d zero_to_one(Rng& rng)
325 return pdiv(_mm256_cvtepi32_pd(pand(this->rawbits_half(rng), pset1<Packet4i>(0x7FFFFFFF))),
326 pset1<Packet4d>(0x7FFFFFFF));
329 EIGEN_STRONG_INLINE Packet4d uniform_real(Rng& rng)
331 return bit_to_ur_double(this->rawbits(rng));
338 #ifdef EIGEN_VECTORIZE_SSE2
339 #include <xmmintrin.h>
345 template<
typename Rng,
typename RngResult>
346 struct RawbitsMaker<Packet4i, Rng, RngResult, Rand::RandomEngineType::scalar>
348 EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
350 if (
sizeof(RngResult) == 8)
352 return _mm_set_epi64x(rng(), rng());
356 return _mm_set_epi32(rng(), rng(), rng(), rng());
360 EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
362 if (
sizeof(RngResult) == 8)
364 return _mm_set_epi64x(rng(), rng());
368 #ifdef EIGEN_VECTORIZE_SSSE3
369 Packet4i p = _mm_setr_epi32(rng(), rng(), rng(), 0);
370 return _mm_shuffle_epi8(p, _mm_setr_epi8(
376 return _mm_set_epi32(rng(), rng(), rng(), rng());
381 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
383 if (
sizeof(decltype(rng())) == 8)
385 return _mm_set_epi64x(0, rng());
389 return _mm_setr_epi32(rng(), rng(), 0, 0);
394 template<
typename Rng>
395 struct RawbitsMaker<Packet4i, Rng, Packet4i, Rand::RandomEngineType::packet>
397 EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
402 EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
407 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
413 template<
typename Rng>
414 struct UniformRealUtils<Packet4f, Rng> :
public RawbitsMaker<Packet4i, Rng>
416 EIGEN_STRONG_INLINE Packet4f zero_to_one(Rng& rng)
418 return pdiv(_mm_cvtepi32_ps(pand(this->rawbits(rng), pset1<Packet4i>(0x7FFFFFFF))),
419 pset1<Packet4f>(0x7FFFFFFF));
422 EIGEN_STRONG_INLINE Packet4f uniform_real(Rng& rng)
424 return bit_to_ur_float(this->rawbits_34(rng));
428 template<
typename Rng>
429 struct UniformRealUtils<Packet2d, Rng> :
public RawbitsMaker<Packet4i, Rng>
431 EIGEN_STRONG_INLINE Packet2d zero_to_one(Rng& rng)
433 return pdiv(_mm_cvtepi32_pd(pand(this->rawbits_half(rng), pset1<Packet4i>(0x7FFFFFFF))),
434 pset1<Packet2d>(0x7FFFFFFF));
437 EIGEN_STRONG_INLINE Packet2d uniform_real(Rng& rng)
439 return bit_to_ur_double(this->rawbits(rng));
450 template<
typename Scalar,
typename Rng>
451 struct scalar_base_rng
454 Rand::IsScalarRandomEngine<
455 typename std::remove_reference<Rng>::type
457 Rand::IsPacketRandomEngine<
458 typename std::remove_reference<Rng>::type
459 >::value,
"Rng must satisfy RandomNumberEngine");
463 scalar_base_rng(
const Rng& _rng) : rng{ _rng }
467 scalar_base_rng(
const scalar_base_rng& o)
472 scalar_base_rng(scalar_base_rng&& o)
473 : rng{ std::move(o.rng) }