12 #ifndef EIGENRAND_RAND_UTILS_H
13 #define EIGENRAND_RAND_UTILS_H
23 template<
typename Packet,
typename Rng,
24 typename RngResult =
typename std::remove_reference<Rng>::type::result_type,
25 Rand::RandomEngineType reType = Rand::GetRandomEngineType<
26 typename std::remove_reference<Rng>::type
30 template<
typename PacketType,
typename Rng>
31 struct UniformRealUtils;
33 template<
typename PacketType,
typename Rng>
34 struct RandUtils :
public UniformRealUtils<PacketType, Rng>
36 EIGEN_STRONG_INLINE PacketType
balanced(Rng& rng)
38 return psub(pmul(this->zero_to_one(rng), pset1<PacketType>(2)), pset1<PacketType>(1));
41 EIGEN_STRONG_INLINE PacketType nonzero_uniform_real(Rng& rng)
43 constexpr
auto epsilon = std::numeric_limits<typename unpacket_traits<PacketType>::type>::epsilon() / 8;
44 return padd(this->uniform_real(rng), pset1<PacketType>(epsilon));
48 EIGEN_STRONG_INLINE uint32_t collect_upper8bits(uint32_t a, uint32_t b, uint32_t c)
50 return ((a & 0xFF000000) >> 24) | ((b & 0xFF000000) >> 16) | ((c & 0xFF000000) >> 8);
55 #ifdef EIGEN_VECTORIZE_AVX
56 #include <immintrin.h>
62 template<
typename Rng>
63 struct RawbitsMaker<Packet4i, Rng, Packet8i, Rand::RandomEngineType::packet>
65 EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
70 EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
75 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
81 template<
typename Rng>
82 struct RawbitsMaker<Packet8i, Rng, Packet4i, Rand::RandomEngineType::packet>
84 EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
86 return _mm256_insertf128_si256(_mm256_castsi128_si256(rng()), rng(), 1);
89 EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
91 return _mm256_insertf128_si256(_mm256_castsi128_si256(rng()), rng(), 1);
94 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
100 template<
typename Rng,
typename RngResult>
101 struct RawbitsMaker<Packet8i, Rng, RngResult, Rand::RandomEngineType::scalar>
103 EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
105 if (
sizeof(decltype(rng())) == 8)
107 return _mm256_set_epi64x(rng(), rng(), rng(), rng());
111 return _mm256_set_epi32(rng(), rng(), rng(), rng(),
112 rng(), rng(), rng(), rng());
116 EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
119 if (
sizeof(decltype(rng())) == 8)
121 #ifdef EIGEN_VECTORIZE_AVX2
122 p = _mm256_setr_epi64x(rng(), rng(), rng(), 0);
123 p = _mm256_permutevar8x32_epi32(p, _mm256_setr_epi32(0, 1, 2, 7, 3, 4, 5, 7));
124 p = _mm256_shuffle_epi8(p, _mm256_setr_epi8(
137 p = _mm256_setr_epi64x(rng(), v, rng(), v >> 32);
138 Packet4i p1, p2, o = _mm_setr_epi8(
143 split_two(p, p1, p2);
144 p = combine_two(_mm_shuffle_epi8(p1, o), _mm_shuffle_epi8(p2, o));
149 p = _mm256_setr_epi32(rng(), rng(), rng(), 0, rng(), rng(), rng(), 0);
150 #ifdef EIGEN_VECTORIZE_AVX2
151 p = _mm256_shuffle_epi8(p, _mm256_setr_epi8(
162 Packet4i p1, p2, o = _mm_setr_epi8(
167 split_two(p, p1, p2);
168 p = combine_two(_mm_shuffle_epi8(p1, o), _mm_shuffle_epi8(p2, o));
174 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
176 if (
sizeof(decltype(rng())) == 8)
178 return _mm_set_epi64x(rng(), rng());
182 return _mm_set_epi32(rng(), rng(), rng(), rng());
187 template<
typename Rng>
188 struct RawbitsMaker<Packet8i, Rng, Packet8i, Rand::RandomEngineType::packet>
190 EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
195 EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
200 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
206 #ifndef EIGEN_VECTORIZE_AVX2
208 EIGEN_STRONG_INLINE Packet8f bit_to_ur_float<Packet8i>(
const Packet8i& x)
210 const Packet4i lower = pset1<Packet4i>(0x7FFFFF),
211 upper = pset1<Packet4i>(127 << 23);
212 const Packet8f one = pset1<Packet8f>(1);
215 split_two(x, x1, x2);
217 return psub(reinterpret_to_float(
218 combine_two(por(pand(x1, lower), upper), por(pand(x2, lower), upper)
223 EIGEN_STRONG_INLINE Packet4d bit_to_ur_double<Packet8i>(
const Packet8i& x)
225 const Packet4i lower = pseti64<Packet4i>(0xFFFFFFFFFFFFFull),
226 upper = pseti64<Packet4i>(1023ull << 52);
227 const Packet4d one = pset1<Packet4d>(1);
230 split_two(x, x1, x2);
232 return psub(reinterpret_to_double(
233 combine_two(por(pand(x1, lower), upper), por(pand(x2, lower), upper)
238 template<
typename Rng>
239 struct UniformRealUtils<Packet8f, Rng> :
public RawbitsMaker<Packet8i, Rng>
241 EIGEN_STRONG_INLINE Packet8f zero_to_one(Rng& rng)
243 return pdiv(_mm256_cvtepi32_ps(pand(this->rawbits(rng), pset1<Packet8i>(0x7FFFFFFF))),
244 pset1<Packet8f>(0x7FFFFFFF));
247 EIGEN_STRONG_INLINE Packet8f uniform_real(Rng& rng)
249 return bit_to_ur_float(this->rawbits_34(rng));
253 template<
typename Rng>
254 struct UniformRealUtils<Packet4d, Rng> :
public RawbitsMaker<Packet8i, Rng>
256 EIGEN_STRONG_INLINE Packet4d zero_to_one(Rng& rng)
258 return pdiv(_mm256_cvtepi32_pd(pand(this->rawbits_half(rng), pset1<Packet4i>(0x7FFFFFFF))),
259 pset1<Packet4d>(0x7FFFFFFF));
262 EIGEN_STRONG_INLINE Packet4d uniform_real(Rng& rng)
264 return bit_to_ur_double(this->rawbits(rng));
271 #ifdef EIGEN_VECTORIZE_SSE2
272 #include <xmmintrin.h>
278 template<
typename Rng,
typename RngResult>
279 struct RawbitsMaker<Packet4i, Rng, RngResult, Rand::RandomEngineType::scalar>
281 EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
283 if (
sizeof(RngResult) == 8)
285 return _mm_set_epi64x(rng(), rng());
289 return _mm_set_epi32(rng(), rng(), rng(), rng());
293 EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
295 if (
sizeof(RngResult) == 8)
297 return _mm_set_epi64x(rng(), rng());
301 #ifdef EIGEN_VECTORIZE_SSSE3
302 Packet4i p = _mm_setr_epi32(rng(), rng(), rng(), 0);
303 return _mm_shuffle_epi8(p, _mm_setr_epi8(
309 return _mm_set_epi32(rng(), rng(), rng(), rng());
314 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
316 if (
sizeof(decltype(rng())) == 8)
318 return _mm_set_epi64x(0, rng());
322 return _mm_setr_epi32(rng(), rng(), 0, 0);
327 template<
typename Rng>
328 struct RawbitsMaker<Packet4i, Rng, Packet4i, Rand::RandomEngineType::packet>
330 EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
335 EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
340 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
346 template<
typename Rng>
347 struct UniformRealUtils<Packet4f, Rng> :
public RawbitsMaker<Packet4i, Rng>
349 EIGEN_STRONG_INLINE Packet4f zero_to_one(Rng& rng)
351 return pdiv(_mm_cvtepi32_ps(pand(this->rawbits(rng), pset1<Packet4i>(0x7FFFFFFF))),
352 pset1<Packet4f>(0x7FFFFFFF));
355 EIGEN_STRONG_INLINE Packet4f uniform_real(Rng& rng)
357 return bit_to_ur_float(this->rawbits_34(rng));
361 template<
typename Rng>
362 struct UniformRealUtils<Packet2d, Rng> :
public RawbitsMaker<Packet4i, Rng>
364 EIGEN_STRONG_INLINE Packet2d zero_to_one(Rng& rng)
366 return pdiv(_mm_cvtepi32_pd(pand(this->rawbits_half(rng), pset1<Packet4i>(0x7FFFFFFF))),
367 pset1<Packet2d>(0x7FFFFFFF));
370 EIGEN_STRONG_INLINE Packet2d uniform_real(Rng& rng)
372 return bit_to_ur_double(this->rawbits(rng));
383 template<
typename Gen,
typename _Scalar,
typename Rng,
bool _mutable = false>
384 struct scalar_rng_adaptor
387 Rand::IsScalarRandomEngine<
388 typename std::remove_reference<Rng>::type
390 Rand::IsPacketRandomEngine<
391 typename std::remove_reference<Rng>::type
392 >::value,
"Rng must satisfy RandomNumberEngine");
397 scalar_rng_adaptor(
const Rng& _rng) : rng{ _rng }
401 template<
typename _Gen>
402 scalar_rng_adaptor(
const Rng& _rng, _Gen&& _gen) : gen{ _gen }, rng{ _rng }
406 scalar_rng_adaptor(
const scalar_rng_adaptor& o) =
default;
407 scalar_rng_adaptor(scalar_rng_adaptor&& o) =
default;
409 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const _Scalar operator() ()
const
414 template<
typename Packet>
415 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Packet packetOp()
const
417 return gen.template packetOp<Packet>(rng);
421 template<
typename Gen,
typename _Scalar,
typename Rng>
422 struct scalar_rng_adaptor<Gen, _Scalar, Rng, true>
425 Rand::IsScalarRandomEngine<
426 typename std::remove_reference<Rng>::type
428 Rand::IsPacketRandomEngine<
429 typename std::remove_reference<Rng>::type
430 >::value,
"Rng must satisfy RandomNumberEngine");
435 scalar_rng_adaptor(
const Rng& _rng) : rng{ _rng }
439 template<
typename _Gen>
440 scalar_rng_adaptor(
const Rng& _rng, _Gen&& _gen) : gen{ _gen }, rng{ _rng }
444 scalar_rng_adaptor(
const scalar_rng_adaptor& o) =
default;
445 scalar_rng_adaptor(scalar_rng_adaptor&& o) =
default;
447 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const _Scalar operator() ()
const
452 template<
typename Packet>
453 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Packet packetOp()
const
455 return gen.template packetOp<Packet>(rng);
459 template<
typename Gen,
typename _Scalar,
typename Urng,
bool _mutable>
460 struct functor_traits<scalar_rng_adaptor<Gen, _Scalar, Urng, _mutable> >
462 enum { Cost = HugeCost, PacketAccess = packet_traits<_Scalar>::Vectorizable, IsRepeatable =
false };