12 #ifndef EIGENRAND_PACKET_RANDOM_ENGINE_H
13 #define EIGENRAND_PACKET_RANDOM_ENGINE_H
17 #include <type_traits>
25 struct IsIntPacket : std::false_type {};
30 #ifdef EIGEN_VECTORIZE_AVX2
32 struct IsIntPacket<Packet8i> : std::true_type {};
35 struct HalfPacket<Packet8i>
37 using type = Packet4i;
40 #ifdef EIGEN_VECTORIZE_SSE2
42 struct IsIntPacket<Packet4i> : std::true_type {};
45 struct HalfPacket<Packet4i>
47 using type = uint64_t;
50 template<
typename Packet>
51 EIGEN_STRONG_INLINE Packet pcmpeq64(
const Packet& a,
const Packet& b);
53 template<
typename Packet>
54 EIGEN_STRONG_INLINE Packet pmuluadd64(
const Packet& a, uint64_t b, uint64_t c);
56 #ifdef EIGEN_VECTORIZE_AVX
58 EIGEN_STRONG_INLINE Packet8i pcmpeq64<Packet8i>(
const Packet8i& a,
const Packet8i& b)
60 #ifdef EIGEN_VECTORIZE_AVX2
61 return _mm256_cmpeq_epi64(a, b);
63 Packet4i a1, a2, b1, b2;
66 return combine_two(_mm_cmpeq_epi64(a1, b1), _mm_cmpeq_epi64(a2, b2));
71 EIGEN_STRONG_INLINE Packet8i pmuluadd64<Packet8i>(
const Packet8i& a, uint64_t b, uint64_t c)
74 _mm256_storeu_si256((__m256i*)u, a);
79 return _mm256_loadu_si256((__m256i*)u);
83 #ifdef EIGEN_VECTORIZE_SSE2
85 EIGEN_STRONG_INLINE Packet4i pcmpeq64<Packet4i>(
const Packet4i& a,
const Packet4i& b)
87 #ifdef EIGEN_VECTORIZE_SSE4_1
88 return _mm_cmpeq_epi64(a, b);
90 Packet4i c = _mm_cmpeq_epi32(a, b);
91 return pand(c, _mm_shuffle_epi32(c, _MM_SHUFFLE(2, 3, 0, 1)));
96 EIGEN_STRONG_INLINE Packet4i pmuluadd64<Packet4i>(
const Packet4i& a, uint64_t b, uint64_t c)
99 _mm_storeu_si128((__m128i*)u, a);
102 return _mm_loadu_si128((__m128i*)u);
113 auto test_integral_result_type(
int) -> std::integral_constant<bool, std::is_integral<typename T::result_type>::value>;
116 auto test_integral_result_type(...) -> std::false_type;
119 auto test_intpacket_result_type(
int)->std::integral_constant<bool, internal::IsIntPacket<typename T::result_type>::value>;
122 auto test_intpacket_result_type(...)->std::false_type;
125 template<
typename Ty>
126 struct IsScalarRandomEngine : decltype(detail::test_integral_result_type<Ty>(0))
130 template<
typename Ty>
131 struct IsPacketRandomEngine : decltype(detail::test_intpacket_result_type<Ty>(0))
135 enum class RandomEngineType
140 template<
typename Ty>
141 struct GetRandomEngineType : std::integral_constant <
143 IsPacketRandomEngine<Ty>::value ? RandomEngineType::packet :
144 (IsScalarRandomEngine<Ty>::value ? RandomEngineType::scalar : RandomEngineType::none)
149 #ifndef EIGEN_DONT_VECTORIZE
170 template<
typename Packet,
172 int _Rx, uint64_t _Px,
173 int _Ux, uint64_t _Dx,
174 int _Sx, uint64_t _Bx,
175 int _Tx, uint64_t _Cx,
176 int _Lx, uint64_t _Fx>
180 using result_type = Packet;
182 static constexpr
int word_size = 64;
183 static constexpr
int state_size = _Nx;
184 static constexpr
int shift_size = _Mx;
185 static constexpr
int mask_bits = _Rx;
186 static constexpr uint64_t parameter_a = _Px;
187 static constexpr
int output_u = _Ux;
188 static constexpr
int output_s = _Sx;
189 static constexpr uint64_t output_b = _Bx;
190 static constexpr
int output_t = _Tx;
191 static constexpr uint64_t output_c = _Cx;
192 static constexpr
int output_l = _Lx;
194 static constexpr uint64_t default_seed = 5489U;
206 using namespace Eigen::internal;
207 std::array<uint64_t, unpacket_traits<Packet>::size / 2> seeds;
208 for (uint64_t i = 0; i < seeds.size(); ++i)
212 seed(ploadu<Packet>((
int*)seeds.data()));
232 using namespace Eigen::internal;
233 Packet prev = state[0] = x0;
234 for (
int i = 1; i < _Nx; ++i)
236 prev = state[i] = pmuluadd64(pxor(prev, psrl64(prev, word_size - 2)), _Fx, i);
273 else if (2 * _Nx <= stateIdx)
276 using namespace Eigen::internal;
278 Packet res = state[stateIdx++];
279 res = pxor(res, pand(psrl64(res, _Ux), pseti64<Packet>(_Dx)));
280 res = pxor(res, pand(psll64(res, _Sx), pseti64<Packet>(_Bx)));
281 res = pxor(res, pand(psll64(res, _Tx), pseti64<Packet>(_Cx)));
282 res = pxor(res, psrl64(res, _Lx));
293 for (; 0 < num; --num)
299 typename internal::HalfPacket<Packet>::type half()
306 typename internal::HalfPacket<Packet>::type a;
307 internal::split_two(
operator()(), a, cache);
316 using namespace Eigen::internal;
318 auto hmask = pseti64<Packet>(_hMask),
319 lmask = pseti64<Packet>(_lMask),
320 px = pseti64<Packet>(_Px),
321 one = pseti64<Packet>(1);
324 for (i = 0; i < _Nx - _Mx; ++i)
326 Packet tmp = por(pand(state[i + _Nx], hmask),
327 pand(state[i + _Nx + 1], lmask));
329 state[i] = pxor(pxor(
331 pand(pcmpeq64(pand(tmp, one), one), px)),
336 for (; i < _Nx - 1; ++i)
338 Packet tmp = por(pand(state[i + _Nx], hmask),
339 pand(state[i + _Nx + 1], lmask));
341 state[i] = pxor(pxor(
343 pand(pcmpeq64(pand(tmp, one), one), px)),
348 Packet tmp = por(pand(state[i + _Nx], hmask),
349 pand(state[0], lmask));
350 state[i] = pxor(pxor(
352 pand(pcmpeq64(pand(tmp, one), one), px)),
360 using namespace Eigen::internal;
362 auto hmask = pseti64<Packet>(_hMask),
363 lmask = pseti64<Packet>(_lMask),
364 px = pseti64<Packet>(_Px),
365 one = pseti64<Packet>(1);
367 for (
int i = _Nx; i < 2 * _Nx; ++i)
369 Packet tmp = por(pand(state[i - _Nx], hmask),
370 pand(state[i - _Nx + 1], lmask));
372 state[i] = pxor(pxor(
374 pand(pcmpeq64(pand(tmp, one), one), px)),
380 std::array<Packet, _Nx * 2> state;
382 typename internal::HalfPacket<Packet>::type cache;
385 static constexpr uint64_t _wMask = (uint64_t)-1;
386 static constexpr uint64_t _hMask = (_wMask << _Rx) & _wMask;
387 static constexpr uint64_t _lMask = ~_hMask & _wMask;
395 template<
typename Packet>
397 0xb5026f5aa96619e9, 29,
398 0x5555555555555555, 17,
399 0x71d67fffeda60000, 37,
400 0xfff7eee000000000, 43, 6364136223846793005>;
409 template<
typename UIntType,
typename BaseRng>
412 static_assert(IsPacketRandomEngine<BaseRng>::value,
"BaseRNG must be a kind of PacketRandomEngine.");
414 using result_type = UIntType;
429 static constexpr result_type
min()
431 return std::numeric_limits<result_type>::min();
434 static constexpr result_type
max()
436 return std::numeric_limits<result_type>::max();
449 static constexpr
size_t buf_size = 64 /
sizeof(result_type);
454 const size_t stride =
sizeof(
typename BaseRng::result_type) /
sizeof(result_type);
455 for (
size_t i = 0; i < buf_size; i += stride)
457 *(
typename BaseRng::result_type*)&buf[i] = rng();
462 std::array<result_type, buf_size> buf;
463 size_t cnt = buf_size;
475 template<
typename UIntType,
typename Rng>
477 IsPacketRandomEngine<typename std::remove_reference<Rng>::type>::value,
481 return { std::forward<Rng>(rng) };
484 template<
typename UIntType,
typename Rng>
486 IsScalarRandomEngine<typename std::remove_reference<Rng>::type>::value,
487 typename std::remove_reference<Rng>::type
490 return std::forward<Rng>(rng);
493 #ifdef EIGEN_VECTORIZE_AVX2
494 using Vmt19937_64 = Pmt19937_64<internal::Packet8i>;
495 #elif defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_SSE2)
496 using Vmt19937_64 = Pmt19937_64<internal::Packet4i>;