12#ifndef EIGENRAND_PACKET_RANDOM_ENGINE_H
13#define EIGENRAND_PACKET_RANDOM_ENGINE_H
28 auto test_integral_result_type(
int)->std::integral_constant<bool, std::is_integral<typename T::result_type>::value && !(T::min() == 0 && (T::max() & T::max() + 1) == 0)>;
31 auto test_integral_result_type(...)->std::false_type;
34 auto test_integral_fullbit_result_type(
int)->std::integral_constant<bool, std::is_integral<typename T::result_type>::value&& T::min() == 0 && (T::max() & T::max() + 1) == 0>;
37 auto test_integral_fullbit_result_type(...)->std::false_type;
40 auto test_intpacket_result_type(
int)->std::integral_constant<bool, internal::IsIntPacket<typename T::result_type>::value>;
43 auto test_intpacket_result_type(...)->std::false_type;
47 struct IsScalarRandomEngine : decltype(detail::test_integral_result_type<Ty>(0))
52 struct IsScalarFullBitRandomEngine : decltype(detail::test_integral_fullbit_result_type<Ty>(0))
57 struct IsPacketRandomEngine : decltype(detail::test_intpacket_result_type<Ty>(0))
61 enum class RandomEngineType
63 none, scalar, scalar_fullbit, packet
67 struct GetRandomEngineType : std::integral_constant <
69 IsPacketRandomEngine<Ty>::value ? RandomEngineType::packet :
70 IsScalarFullBitRandomEngine<Ty>::value ? RandomEngineType::scalar_fullbit :
71 IsScalarRandomEngine<Ty>::value ? RandomEngineType::scalar : RandomEngineType::none
76 template<
typename Ty,
size_t length,
size_t alignment = 64>
83 for (
size_t i = 0; i < length; ++i)
85 new (&aligned[i]) Ty();
89 AlignedArray(
const AlignedArray& o)
92 for (
size_t i = 0; i < length; ++i)
98 AlignedArray(AlignedArray&& o)
100 std::swap(memory, o.memory);
101 std::swap(aligned, o.aligned);
104 AlignedArray& operator=(
const AlignedArray& o)
106 for (
size_t i = 0; i < length; ++i)
113 AlignedArray& operator=(AlignedArray&& o)
115 std::swap(memory, o.memory);
116 std::swap(aligned, o.aligned);
125 Ty& operator[](
size_t i)
130 const Ty& operator[](
size_t i)
const
145 const Ty* data()
const
153 memory = std::malloc(
sizeof(Ty) * length + alignment);
154 aligned = (Ty*)(((
size_t)memory + alignment) & ~(alignment - 1));
161 for (
size_t i = 0; i < length; ++i)
171 void* memory =
nullptr;
172 Ty* aligned =
nullptr;
175#ifndef EIGEN_DONT_VECTORIZE
196 template<
typename Packet,
198 int _Rx, uint64_t _Px,
199 int _Ux, uint64_t _Dx,
200 int _Sx, uint64_t _Bx,
201 int _Tx, uint64_t _Cx,
202 int _Lx, uint64_t _Fx>
206 using result_type = Packet;
208 static constexpr int word_size = 64;
209 static constexpr int state_size = _Nx;
210 static constexpr int shift_size = _Mx;
211 static constexpr int mask_bits = _Rx;
212 static constexpr uint64_t parameter_a = _Px;
213 static constexpr int output_u = _Ux;
214 static constexpr int output_s = _Sx;
215 static constexpr uint64_t output_b = _Bx;
216 static constexpr int output_t = _Tx;
217 static constexpr uint64_t output_c = _Cx;
218 static constexpr int output_l = _Lx;
220 static constexpr uint64_t default_seed = 5489U;
232 using namespace Eigen::internal;
233 std::array<uint64_t, unpacket_traits<Packet>::size / 2> seeds;
234 for (uint64_t i = 0; i < seeds.size(); ++i)
238 seed(ploadu<Packet>((
int*)seeds.data()));
258 using namespace Eigen::internal;
259 Packet prev = state[0] = x0;
260 for (
int i = 1; i < _Nx; ++i)
262 prev = state[i] = pmuluadd64(pxor(prev, psrl64<word_size - 2>(prev)), _Fx, i);
272 static constexpr uint64_t
min()
282 static constexpr uint64_t
max()
299 else if (2 * _Nx <= stateIdx)
302 using namespace Eigen::internal;
304 Packet res = state[stateIdx++];
305 res = pxor(res, pand(psrl64<_Ux>(res), pseti64<Packet>(_Dx)));
306 res = pxor(res, pand(psll64<_Sx>(res), pseti64<Packet>(_Bx)));
307 res = pxor(res, pand(psll64<_Tx>(res), pseti64<Packet>(_Cx)));
308 res = pxor(res, psrl64<_Lx>(res));
319 for (; 0 < num; --num)
325 typename internal::HalfPacket<Packet>::type half()
332 typename internal::HalfPacket<Packet>::type a;
333 internal::split_two(
operator()(), a, cache);
342 using namespace Eigen::internal;
344 auto hmask = pseti64<Packet>(_hMask),
345 lmask = pseti64<Packet>(_lMask),
346 px = pseti64<Packet>(_Px),
347 one = pseti64<Packet>(1);
350 for (i = 0; i < _Nx - _Mx; ++i)
352 Packet tmp = por(pand(state[i + _Nx], hmask),
353 pand(state[i + _Nx + 1], lmask));
355 state[i] = pxor(pxor(
357 pand(pcmpeq64(pand(tmp, one), one), px)),
362 for (; i < _Nx - 1; ++i)
364 Packet tmp = por(pand(state[i + _Nx], hmask),
365 pand(state[i + _Nx + 1], lmask));
367 state[i] = pxor(pxor(
369 pand(pcmpeq64(pand(tmp, one), one), px)),
374 Packet tmp = por(pand(state[i + _Nx], hmask),
375 pand(state[0], lmask));
376 state[i] = pxor(pxor(
378 pand(pcmpeq64(pand(tmp, one), one), px)),
386 using namespace Eigen::internal;
388 auto hmask = pseti64<Packet>(_hMask),
389 lmask = pseti64<Packet>(_lMask),
390 px = pseti64<Packet>(_Px),
391 one = pseti64<Packet>(1);
393 for (
int i = _Nx; i < 2 * _Nx; ++i)
395 Packet tmp = por(pand(state[i - _Nx], hmask),
396 pand(state[i - _Nx + 1], lmask));
398 state[i] = pxor(pxor(
400 pand(pcmpeq64(pand(tmp, one), one), px)),
406 AlignedArray<Packet, _Nx * 2> state;
408 typename internal::HalfPacket<Packet>::type cache;
411 static constexpr uint64_t _wMask = (uint64_t)-1;
412 static constexpr uint64_t _hMask = (_wMask << _Rx) & _wMask;
413 static constexpr uint64_t _lMask = ~_hMask & _wMask;
421 template<
typename Packet>
423 0xb5026f5aa96619e9, 29,
424 0x5555555555555555, 17,
425 0x71d67fffeda60000, 37,
426 0xfff7eee000000000, 43, 6364136223846793005>;
429 template<
typename UIntType,
typename BaseRng,
int numU64>
430 class ParallelRandomEngineAdaptor
432 static_assert(GetRandomEngineType<BaseRng>::value != RandomEngineType::none,
"BaseRng must be a kind of Random Engine.");
433 static_assert(GetRandomEngineType<BaseRng>::value != RandomEngineType::scalar,
"BaseRng must be a kind of mersenne_twister_engine.");
435 using result_type = UIntType;
437 ParallelRandomEngineAdaptor(
size_t seed = BaseRng::default_seed)
439 for (
int i = 0; i < num_parallel; ++i)
442 new (&rngs[i]) BaseRng{ seed + i * u64_stride };
446 ParallelRandomEngineAdaptor(
const BaseRng& o)
448 for (
int i = 0; i < num_parallel; ++i)
451 new (&rngs[i]) BaseRng{ o };
455 ParallelRandomEngineAdaptor(
const ParallelRandomEngineAdaptor&) =
default;
456 ParallelRandomEngineAdaptor(ParallelRandomEngineAdaptor&&) =
default;
458 static constexpr result_type min()
460 return std::numeric_limits<result_type>::min();
463 static constexpr result_type max()
465 return std::numeric_limits<result_type>::max();
468 result_type operator()()
479 if (fcnt >= fbuf_size)
490 for (
size_t i = 0; i < num_parallel; ++i)
492 reinterpret_cast<typename BaseRng::result_type&
>(buf[i * result_type_stride]) = rngs[i]();
496 void refill_fbuffer()
499 for (
size_t i = 0; i < num_parallel; ++i)
501 auto urf = internal::bit_to_ur_float(rngs[i]());
502 reinterpret_cast<decltype(urf)&
>(fbuf[i * u64_stride * 2]) = urf;
506 static constexpr int u64_stride =
sizeof(
typename BaseRng::result_type) /
sizeof(uint64_t);
507 static constexpr int result_type_stride =
sizeof(
typename BaseRng::result_type) /
sizeof(result_type);
508 static constexpr int num_parallel = numU64 / u64_stride;
509 static constexpr int byte_size =
sizeof(uint64_t) * numU64;
510 static constexpr size_t buf_size = byte_size /
sizeof(result_type);
511 static constexpr size_t fbuf_size = byte_size /
sizeof(float);
513 std::array<BaseRng, num_parallel> rngs;
514 AlignedArray<result_type, buf_size> buf;
515 AlignedArray<float, fbuf_size> fbuf;
516 size_t cnt = buf_size, fcnt = fbuf_size;
525 template<
typename UIntType,
typename BaseRng>
527 sizeof(
typename BaseRng::result_type) /
sizeof(uint64_t)>;
529 template<
typename BaseRng>
530 class RandomEngineWrapper :
public BaseRng
533 using BaseRng::BaseRng;
535 RandomEngineWrapper(
const BaseRng& o) : BaseRng{ o }
539 RandomEngineWrapper(BaseRng&& o) : BaseRng{ o }
543 RandomEngineWrapper(
size_t seed) : BaseRng{ seed }
547 RandomEngineWrapper() =
default;
548 RandomEngineWrapper(
const RandomEngineWrapper&) =
default;
549 RandomEngineWrapper(RandomEngineWrapper&&) =
default;
553 internal::BitScalar<float> bs;
554 return bs.to_ur(this->
operator()());
558 template<
typename UIntType,
typename Rng>
559 using UniversalRandomEngine =
typename std::conditional<
560 IsPacketRandomEngine<typename std::remove_reference<Rng>::type>::value,
561 PacketRandomEngineAdaptor<UIntType, typename std::remove_reference<Rng>::type>,
562 typename std::conditional<
563 IsScalarFullBitRandomEngine<typename std::remove_reference<Rng>::type>::value,
564 RandomEngineWrapper<typename std::remove_reference<Rng>::type>,
577 template<
typename UIntType,
typename Rng>
580 static_assert(IsPacketRandomEngine<typename std::remove_reference<Rng>::type>::value || IsScalarFullBitRandomEngine<typename std::remove_reference<Rng>::type>::value,
581 "`Rng` must be a kind of RandomPacketEngine like std::mersenne_twister_engine");
582 return { std::forward<Rng>(rng) };
585#ifdef EIGEN_VECTORIZE_AVX2
586 using Vmt19937_64 = Pmt19937_64<internal::Packet8i>;
587#elif defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_SSE2) || defined(EIGEN_VECTORIZE_NEON)
588 using Vmt19937_64 = Pmt19937_64<internal::Packet4i>;
600 template<
typename UIntType = u
int64_t>
601 using P8_mt19937 = ParallelRandomEngineAdaptor<UIntType, Vmt19937_64, 8>;
609 using P8_mt19937_64_32 = P8_mt19937<uint32_t>;
A vectorized version of Mersenne Twister Engine.
Definition: PacketRandomEngine.h:204
MersenneTwister(Packet x0)
Construct a new Mersenne Twister engine with a packet seed.
Definition: PacketRandomEngine.h:246
void seed(Packet x0)
initialize the engine with a given seed
Definition: PacketRandomEngine.h:256
static constexpr uint64_t max()
maximum value of the result
Definition: PacketRandomEngine.h:282
result_type operator()()
Generates one random packet and advance the internal state.
Definition: PacketRandomEngine.h:295
MersenneTwister(uint64_t x0=default_seed)
Construct a new Mersenne Twister engine with a scalar seed.
Definition: PacketRandomEngine.h:230
static constexpr uint64_t min()
minimum value of the result
Definition: PacketRandomEngine.h:272
void discard(unsigned long long num)
Discards num items being generated.
Definition: PacketRandomEngine.h:317
P8_mt19937< uint64_t > P8_mt19937_64
a vectorized mt19937_64 which generates 8 integers of 64bit simultaneously. It always yields the same...
Definition: PacketRandomEngine.h:607
UniversalRandomEngine< UIntType, Rng > makeUniversalRng(Rng &&rng)
Helper function for making a UniversalRandomEngine.
Definition: PacketRandomEngine.h:578
std::mt19937_64 Vmt19937_64
same as std::mt19937_64 when EIGEN_DONT_VECTORIZE, Pmt19937_64<internal::Packet4i> when SSE2 enabled ...
Definition: PacketRandomEngine.h:598
ParallelRandomEngineAdaptor< UIntType, BaseRng, sizeof(typename BaseRng::result_type)/sizeof(uint64_t)> PacketRandomEngineAdaptor
Scalar adaptor for random engines which generates packet.
Definition: PacketRandomEngine.h:527