12 #ifndef EIGENRAND_PACKET_RANDOM_ENGINE_H
13 #define EIGENRAND_PACKET_RANDOM_ENGINE_H
17 #include <type_traits>
28 auto test_integral_result_type(
int)->std::integral_constant<bool, std::is_integral<typename T::result_type>::value>;
31 auto test_integral_result_type(...)->std::false_type;
34 auto test_intpacket_result_type(
int)->std::integral_constant<bool, internal::IsIntPacket<typename T::result_type>::value>;
37 auto test_intpacket_result_type(...)->std::false_type;
41 struct IsScalarRandomEngine : decltype(detail::test_integral_result_type<Ty>(0))
46 struct IsPacketRandomEngine : decltype(detail::test_intpacket_result_type<Ty>(0))
50 enum class RandomEngineType
56 struct GetRandomEngineType : std::integral_constant <
58 IsPacketRandomEngine<Ty>::value ? RandomEngineType::packet :
59 (IsScalarRandomEngine<Ty>::value ? RandomEngineType::scalar : RandomEngineType::none)
64 template<
typename Ty,
size_t length,
size_t alignment = 64>
71 for (
size_t i = 0; i < length; ++i)
73 new (&aligned[i]) Ty();
77 AlignedArray(
const AlignedArray& o)
80 for (
size_t i = 0; i < length; ++i)
86 AlignedArray(AlignedArray&& o)
88 std::swap(memory, o.memory);
89 std::swap(aligned, o.aligned);
92 AlignedArray& operator=(
const AlignedArray& o)
94 for (
size_t i = 0; i < length; ++i)
101 AlignedArray& operator=(AlignedArray&& o)
103 std::swap(memory, o.memory);
104 std::swap(aligned, o.aligned);
113 Ty& operator[](
size_t i)
118 const Ty& operator[](
size_t i)
const
133 const Ty* data()
const
141 memory = std::malloc(
sizeof(Ty) * length + alignment);
142 aligned = (Ty*)(((
size_t)memory + alignment) & ~(alignment - 1));
149 for (
size_t i = 0; i < length; ++i)
159 void* memory =
nullptr;
160 Ty* aligned =
nullptr;
163 #ifndef EIGEN_DONT_VECTORIZE
184 template<
typename Packet,
186 int _Rx, uint64_t _Px,
187 int _Ux, uint64_t _Dx,
188 int _Sx, uint64_t _Bx,
189 int _Tx, uint64_t _Cx,
190 int _Lx, uint64_t _Fx>
194 using result_type = Packet;
196 static constexpr
int word_size = 64;
197 static constexpr
int state_size = _Nx;
198 static constexpr
int shift_size = _Mx;
199 static constexpr
int mask_bits = _Rx;
200 static constexpr uint64_t parameter_a = _Px;
201 static constexpr
int output_u = _Ux;
202 static constexpr
int output_s = _Sx;
203 static constexpr uint64_t output_b = _Bx;
204 static constexpr
int output_t = _Tx;
205 static constexpr uint64_t output_c = _Cx;
206 static constexpr
int output_l = _Lx;
208 static constexpr uint64_t default_seed = 5489U;
220 using namespace Eigen::internal;
221 std::array<uint64_t, unpacket_traits<Packet>::size / 2> seeds;
222 for (uint64_t i = 0; i < seeds.size(); ++i)
226 seed(ploadu<Packet>((
int*)seeds.data()));
246 using namespace Eigen::internal;
247 Packet prev = state[0] = x0;
248 for (
int i = 1; i < _Nx; ++i)
250 prev = state[i] = pmuluadd64(pxor(prev, psrl64(prev, word_size - 2)), _Fx, i);
287 else if (2 * _Nx <= stateIdx)
290 using namespace Eigen::internal;
292 Packet res = state[stateIdx++];
293 res = pxor(res, pand(psrl64(res, _Ux), pseti64<Packet>(_Dx)));
294 res = pxor(res, pand(psll64(res, _Sx), pseti64<Packet>(_Bx)));
295 res = pxor(res, pand(psll64(res, _Tx), pseti64<Packet>(_Cx)));
296 res = pxor(res, psrl64(res, _Lx));
307 for (; 0 < num; --num)
313 typename internal::HalfPacket<Packet>::type half()
320 typename internal::HalfPacket<Packet>::type a;
321 internal::split_two(
operator()(), a, cache);
330 using namespace Eigen::internal;
332 auto hmask = pseti64<Packet>(_hMask),
333 lmask = pseti64<Packet>(_lMask),
334 px = pseti64<Packet>(_Px),
335 one = pseti64<Packet>(1);
338 for (i = 0; i < _Nx - _Mx; ++i)
340 Packet tmp = por(pand(state[i + _Nx], hmask),
341 pand(state[i + _Nx + 1], lmask));
343 state[i] = pxor(pxor(
345 pand(pcmpeq64(pand(tmp, one), one), px)),
350 for (; i < _Nx - 1; ++i)
352 Packet tmp = por(pand(state[i + _Nx], hmask),
353 pand(state[i + _Nx + 1], lmask));
355 state[i] = pxor(pxor(
357 pand(pcmpeq64(pand(tmp, one), one), px)),
362 Packet tmp = por(pand(state[i + _Nx], hmask),
363 pand(state[0], lmask));
364 state[i] = pxor(pxor(
366 pand(pcmpeq64(pand(tmp, one), one), px)),
374 using namespace Eigen::internal;
376 auto hmask = pseti64<Packet>(_hMask),
377 lmask = pseti64<Packet>(_lMask),
378 px = pseti64<Packet>(_Px),
379 one = pseti64<Packet>(1);
381 for (
int i = _Nx; i < 2 * _Nx; ++i)
383 Packet tmp = por(pand(state[i - _Nx], hmask),
384 pand(state[i - _Nx + 1], lmask));
386 state[i] = pxor(pxor(
388 pand(pcmpeq64(pand(tmp, one), one), px)),
394 AlignedArray<Packet, _Nx * 2> state;
396 typename internal::HalfPacket<Packet>::type cache;
399 static constexpr uint64_t _wMask = (uint64_t)-1;
400 static constexpr uint64_t _hMask = (_wMask << _Rx) & _wMask;
401 static constexpr uint64_t _lMask = ~_hMask & _wMask;
409 template<
typename Packet>
411 0xb5026f5aa96619e9, 29,
412 0x5555555555555555, 17,
413 0x71d67fffeda60000, 37,
414 0xfff7eee000000000, 43, 6364136223846793005>;
417 template<
typename UIntType,
typename BaseRng,
int numU64>
418 class ParallelRandomEngineAdaptor
420 static_assert(GetRandomEngineType<BaseRng>::value != RandomEngineType::none,
"BaseRng must be a kind of Random Engine.");
422 using result_type = UIntType;
424 ParallelRandomEngineAdaptor(
size_t seed = BaseRng::default_seed)
426 for (
int i = 0; i < num_parallel; ++i)
429 new (&rngs[i]) BaseRng{
seed + i * u64_stride };
433 ParallelRandomEngineAdaptor(
const BaseRng& o)
435 for (
int i = 0; i < num_parallel; ++i)
438 new (&rngs[i]) BaseRng{ o };
442 ParallelRandomEngineAdaptor(
const ParallelRandomEngineAdaptor&) =
default;
443 ParallelRandomEngineAdaptor(ParallelRandomEngineAdaptor&&) =
default;
445 static constexpr result_type
min()
447 return std::numeric_limits<result_type>::min();
450 static constexpr result_type
max()
452 return std::numeric_limits<result_type>::max();
466 if (fcnt >= fbuf_size)
477 for (
size_t i = 0; i < num_parallel; ++i)
479 reinterpret_cast<typename BaseRng::result_type&
>(buf[i * result_type_stride]) = rngs[i]();
483 void refill_fbuffer()
486 for (
size_t i = 0; i < num_parallel; ++i)
488 auto urf = internal::bit_to_ur_float(rngs[i]());
489 reinterpret_cast<decltype(urf)&
>(fbuf[i * u64_stride * 2]) = urf;
493 static constexpr
int u64_stride =
sizeof(
typename BaseRng::result_type) /
sizeof(uint64_t);
494 static constexpr
int result_type_stride =
sizeof(
typename BaseRng::result_type) /
sizeof(result_type);
495 static constexpr
int num_parallel = numU64 / u64_stride;
496 static constexpr
int byte_size =
sizeof(uint64_t) * numU64;
497 static constexpr
size_t buf_size = byte_size /
sizeof(result_type);
498 static constexpr
size_t fbuf_size = byte_size /
sizeof(float);
500 std::array<BaseRng, num_parallel> rngs;
501 AlignedArray<result_type, buf_size> buf;
502 AlignedArray<float, fbuf_size> fbuf;
503 size_t cnt = buf_size, fcnt = fbuf_size;
512 template<
typename UIntType,
typename BaseRng>
514 sizeof(
typename BaseRng::result_type) /
sizeof(uint64_t)>;
516 template<
typename BaseRng>
517 class RandomEngineWrapper :
public BaseRng
520 using BaseRng::BaseRng;
522 RandomEngineWrapper(
const BaseRng& o) : BaseRng{ o }
526 RandomEngineWrapper(BaseRng&& o) : BaseRng{ o }
530 RandomEngineWrapper(
size_t seed) : BaseRng{
seed }
534 RandomEngineWrapper() =
default;
535 RandomEngineWrapper(
const RandomEngineWrapper&) =
default;
536 RandomEngineWrapper(RandomEngineWrapper&&) =
default;
540 internal::bit_scalar<float> bs;
541 return bs.to_ur(this->
operator()());
545 template<
typename UIntType,
typename Rng>
546 using UniversalRandomEngine =
typename std::conditional<
547 IsPacketRandomEngine<typename std::remove_reference<Rng>::type>::value,
548 PacketRandomEngineAdaptor<UIntType, typename std::remove_reference<Rng>::type>,
549 typename std::conditional<
550 IsScalarRandomEngine<typename std::remove_reference<Rng>::type>::value,
551 RandomEngineWrapper<typename std::remove_reference<Rng>::type>,
564 template<
typename UIntType,
typename Rng>
567 return { std::forward<Rng>(rng) };
570 #ifdef EIGEN_VECTORIZE_AVX2
571 using Vmt19937_64 = Pmt19937_64<internal::Packet8i>;
572 #elif defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_SSE2)
573 using Vmt19937_64 = Pmt19937_64<internal::Packet4i>;
589 template<
typename UIntType = u
int64_t>
590 using P8_mt19937_64 = ParallelRandomEngineAdaptor<UIntType, Vmt19937_64, 8>;
A vectorized version of Mersenne Twister Engine.
Definition: PacketRandomEngine.h:192
MersenneTwister(Packet x0)
Construct a new Mersenne Twister engine with a packet seed.
Definition: PacketRandomEngine.h:234
uint64_t max() const
maximum value of the result
Definition: PacketRandomEngine.h:270
void seed(Packet x0)
initialize the engine with a given seed
Definition: PacketRandomEngine.h:244
result_type operator()()
Generates one random packet and advance the internal state.
Definition: PacketRandomEngine.h:283
MersenneTwister(uint64_t x0=default_seed)
Construct a new Mersenne Twister engine with a scalar seed.
Definition: PacketRandomEngine.h:218
uint64_t min() const
minimum value of the result
Definition: PacketRandomEngine.h:260
void discard(unsigned long long num)
Discards num items being generated.
Definition: PacketRandomEngine.h:305
ParallelRandomEngineAdaptor< UIntType, Vmt19937_64, 8 > P8_mt19937_64
a vectorized mt19937_64 which generates 8 integers of 64bit simultaneously. It always yields the same...
Definition: PacketRandomEngine.h:590
UniversalRandomEngine< UIntType, Rng > makeUniversalRng(Rng &&rng)
Helper function for making a UniversalRandomEngine.
Definition: PacketRandomEngine.h:565
std::mt19937_64 Vmt19937_64
same as std::mt19937_64 when EIGEN_DONT_VECTORIZE, Pmt19937_64<internal::Packet4i> when SSE2 enabled ...
Definition: PacketRandomEngine.h:583
ParallelRandomEngineAdaptor< UIntType, BaseRng, sizeof(typename BaseRng::result_type)/sizeof(uint64_t)> PacketRandomEngineAdaptor
Scalar adaptor for random engines which generates packet.
Definition: PacketRandomEngine.h:514