12 #ifndef EIGENRAND_PACKET_RANDOM_ENGINE_H
13 #define EIGENRAND_PACKET_RANDOM_ENGINE_H
17 #include <type_traits>
26 struct IsIntPacket : std::false_type {};
31 #ifdef EIGEN_VECTORIZE_AVX2
33 struct IsIntPacket<Packet8i> : std::true_type {};
36 struct HalfPacket<Packet8i>
38 using type = Packet4i;
41 #ifdef EIGEN_VECTORIZE_SSE2
43 struct IsIntPacket<Packet4i> : std::true_type {};
46 struct HalfPacket<Packet4i>
48 using type = uint64_t;
58 auto test_integral_result_type(
int)->std::integral_constant<bool, std::is_integral<typename T::result_type>::value>;
61 auto test_integral_result_type(...)->std::false_type;
64 auto test_intpacket_result_type(
int)->std::integral_constant<bool, internal::IsIntPacket<typename T::result_type>::value>;
67 auto test_intpacket_result_type(...)->std::false_type;
71 struct IsScalarRandomEngine : decltype(detail::test_integral_result_type<Ty>(0))
76 struct IsPacketRandomEngine : decltype(detail::test_intpacket_result_type<Ty>(0))
80 enum class RandomEngineType
86 struct GetRandomEngineType : std::integral_constant <
88 IsPacketRandomEngine<Ty>::value ? RandomEngineType::packet :
89 (IsScalarRandomEngine<Ty>::value ? RandomEngineType::scalar : RandomEngineType::none)
94 template<
typename Ty,
size_t length,
size_t alignment = 64>
101 for (
size_t i = 0; i < length; ++i)
103 new (&aligned[i]) Ty();
107 AlignedArray(
const AlignedArray& o)
110 for (
size_t i = 0; i < length; ++i)
116 AlignedArray(AlignedArray&& o)
118 std::swap(memory, o.memory);
119 std::swap(aligned, o.aligned);
122 AlignedArray& operator=(
const AlignedArray& o)
124 for (
size_t i = 0; i < length; ++i)
131 AlignedArray& operator=(AlignedArray&& o)
133 std::swap(memory, o.memory);
134 std::swap(aligned, o.aligned);
143 Ty& operator[](
size_t i)
148 const Ty& operator[](
size_t i)
const
163 const Ty* data()
const
171 memory = std::malloc(
sizeof(Ty) * length + alignment);
172 aligned = (Ty*)(((
size_t)memory + alignment) & ~(alignment - 1));
179 for (
size_t i = 0; i < length; ++i)
189 void* memory =
nullptr;
190 Ty* aligned =
nullptr;
193 #ifndef EIGEN_DONT_VECTORIZE
214 template<
typename Packet,
216 int _Rx, uint64_t _Px,
217 int _Ux, uint64_t _Dx,
218 int _Sx, uint64_t _Bx,
219 int _Tx, uint64_t _Cx,
220 int _Lx, uint64_t _Fx>
224 using result_type = Packet;
226 static constexpr
int word_size = 64;
227 static constexpr
int state_size = _Nx;
228 static constexpr
int shift_size = _Mx;
229 static constexpr
int mask_bits = _Rx;
230 static constexpr uint64_t parameter_a = _Px;
231 static constexpr
int output_u = _Ux;
232 static constexpr
int output_s = _Sx;
233 static constexpr uint64_t output_b = _Bx;
234 static constexpr
int output_t = _Tx;
235 static constexpr uint64_t output_c = _Cx;
236 static constexpr
int output_l = _Lx;
238 static constexpr uint64_t default_seed = 5489U;
250 using namespace Eigen::internal;
251 std::array<uint64_t, unpacket_traits<Packet>::size / 2> seeds;
252 for (uint64_t i = 0; i < seeds.size(); ++i)
256 seed(ploadu<Packet>((
int*)seeds.data()));
276 using namespace Eigen::internal;
277 Packet prev = state[0] = x0;
278 for (
int i = 1; i < _Nx; ++i)
280 prev = state[i] = pmuluadd64(pxor(prev, psrl64(prev, word_size - 2)), _Fx, i);
317 else if (2 * _Nx <= stateIdx)
320 using namespace Eigen::internal;
322 Packet res = state[stateIdx++];
323 res = pxor(res, pand(psrl64(res, _Ux), pseti64<Packet>(_Dx)));
324 res = pxor(res, pand(psll64(res, _Sx), pseti64<Packet>(_Bx)));
325 res = pxor(res, pand(psll64(res, _Tx), pseti64<Packet>(_Cx)));
326 res = pxor(res, psrl64(res, _Lx));
337 for (; 0 < num; --num)
343 typename internal::HalfPacket<Packet>::type half()
350 typename internal::HalfPacket<Packet>::type a;
351 internal::split_two(
operator()(), a, cache);
360 using namespace Eigen::internal;
362 auto hmask = pseti64<Packet>(_hMask),
363 lmask = pseti64<Packet>(_lMask),
364 px = pseti64<Packet>(_Px),
365 one = pseti64<Packet>(1);
368 for (i = 0; i < _Nx - _Mx; ++i)
370 Packet tmp = por(pand(state[i + _Nx], hmask),
371 pand(state[i + _Nx + 1], lmask));
373 state[i] = pxor(pxor(
375 pand(pcmpeq64(pand(tmp, one), one), px)),
380 for (; i < _Nx - 1; ++i)
382 Packet tmp = por(pand(state[i + _Nx], hmask),
383 pand(state[i + _Nx + 1], lmask));
385 state[i] = pxor(pxor(
387 pand(pcmpeq64(pand(tmp, one), one), px)),
392 Packet tmp = por(pand(state[i + _Nx], hmask),
393 pand(state[0], lmask));
394 state[i] = pxor(pxor(
396 pand(pcmpeq64(pand(tmp, one), one), px)),
404 using namespace Eigen::internal;
406 auto hmask = pseti64<Packet>(_hMask),
407 lmask = pseti64<Packet>(_lMask),
408 px = pseti64<Packet>(_Px),
409 one = pseti64<Packet>(1);
411 for (
int i = _Nx; i < 2 * _Nx; ++i)
413 Packet tmp = por(pand(state[i - _Nx], hmask),
414 pand(state[i - _Nx + 1], lmask));
416 state[i] = pxor(pxor(
418 pand(pcmpeq64(pand(tmp, one), one), px)),
424 AlignedArray<Packet, _Nx * 2> state;
426 typename internal::HalfPacket<Packet>::type cache;
429 static constexpr uint64_t _wMask = (uint64_t)-1;
430 static constexpr uint64_t _hMask = (_wMask << _Rx) & _wMask;
431 static constexpr uint64_t _lMask = ~_hMask & _wMask;
439 template<
typename Packet>
441 0xb5026f5aa96619e9, 29,
442 0x5555555555555555, 17,
443 0x71d67fffeda60000, 37,
444 0xfff7eee000000000, 43, 6364136223846793005>;
447 template<
typename UIntType,
typename BaseRng,
int numU64>
448 class ParallelRandomEngineAdaptor
450 static_assert(GetRandomEngineType<BaseRng>::value != RandomEngineType::none,
"BaseRng must be a kind of Random Engine.");
452 using result_type = UIntType;
454 ParallelRandomEngineAdaptor(
size_t seed = BaseRng::default_seed)
456 for (
int i = 0; i < num_parallel; ++i)
459 new (&rngs[i]) BaseRng{
seed + i * u64_stride };
463 ParallelRandomEngineAdaptor(
const BaseRng& o)
465 for (
int i = 0; i < num_parallel; ++i)
468 new (&rngs[i]) BaseRng{ o };
472 ParallelRandomEngineAdaptor(
const ParallelRandomEngineAdaptor&) =
default;
473 ParallelRandomEngineAdaptor(ParallelRandomEngineAdaptor&&) =
default;
475 static constexpr result_type
min()
477 return std::numeric_limits<result_type>::min();
480 static constexpr result_type
max()
482 return std::numeric_limits<result_type>::max();
496 if (fcnt >= fbuf_size)
507 for (
size_t i = 0; i < num_parallel; ++i)
509 reinterpret_cast<typename BaseRng::result_type&
>(buf[i * result_type_stride]) = rngs[i]();
513 void refill_fbuffer()
516 for (
size_t i = 0; i < num_parallel; ++i)
518 auto urf = internal::bit_to_ur_float(rngs[i]());
519 reinterpret_cast<decltype(urf)&
>(fbuf[i * u64_stride * 2]) = urf;
523 static constexpr
int u64_stride =
sizeof(
typename BaseRng::result_type) /
sizeof(uint64_t);
524 static constexpr
int result_type_stride =
sizeof(
typename BaseRng::result_type) /
sizeof(result_type);
525 static constexpr
int num_parallel = numU64 / u64_stride;
526 static constexpr
int byte_size =
sizeof(uint64_t) * numU64;
527 static constexpr
size_t buf_size = byte_size /
sizeof(result_type);
528 static constexpr
size_t fbuf_size = byte_size /
sizeof(float);
530 std::array<BaseRng, num_parallel> rngs;
531 AlignedArray<result_type, buf_size> buf;
532 AlignedArray<float, fbuf_size> fbuf;
533 size_t cnt = buf_size, fcnt = fbuf_size;
542 template<
typename UIntType,
typename BaseRng>
544 sizeof(
typename BaseRng::result_type) /
sizeof(uint64_t)>;
546 template<
typename BaseRng>
547 class RandomEngineWrapper :
public BaseRng
550 using BaseRng::BaseRng;
552 RandomEngineWrapper(
const BaseRng& o) : BaseRng{ o }
556 RandomEngineWrapper(BaseRng&& o) : BaseRng{ o }
560 RandomEngineWrapper(
size_t seed) : BaseRng{
seed }
564 RandomEngineWrapper() =
default;
565 RandomEngineWrapper(
const RandomEngineWrapper&) =
default;
566 RandomEngineWrapper(RandomEngineWrapper&&) =
default;
570 internal::bit_scalar<float> bs;
571 return bs.to_ur(this->
operator()());
575 template<
typename UIntType,
typename Rng>
576 using UniversalRandomEngine =
typename std::conditional<
577 IsPacketRandomEngine<typename std::remove_reference<Rng>::type>::value,
578 PacketRandomEngineAdaptor<UIntType, typename std::remove_reference<Rng>::type>,
579 typename std::conditional<
580 IsScalarRandomEngine<typename std::remove_reference<Rng>::type>::value,
581 RandomEngineWrapper<typename std::remove_reference<Rng>::type>,
594 template<
typename UIntType,
typename Rng>
597 return { std::forward<Rng>(rng) };
600 #ifdef EIGEN_VECTORIZE_AVX2
601 using Vmt19937_64 = Pmt19937_64<internal::Packet8i>;
602 #elif defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_SSE2)
603 using Vmt19937_64 = Pmt19937_64<internal::Packet4i>;
619 template<
typename UIntType = u
int64_t>
620 using P8_mt19937_64 = ParallelRandomEngineAdaptor<UIntType, Vmt19937_64, 8>;