EigenRand  0.5.0
 
Loading...
Searching...
No Matches
PacketRandomEngine.h
Go to the documentation of this file.
1
12#ifndef EIGENRAND_PACKET_RANDOM_ENGINE_H
13#define EIGENRAND_PACKET_RANDOM_ENGINE_H
14
15#include <array>
16#include <random>
17#include <type_traits>
18#include "MorePacketMath.h"
19#include <fstream>
20
21namespace Eigen
22{
23 namespace Rand
24 {
25 namespace detail
26 {
27 template<typename T>
28 auto test_integral_result_type(int)->std::integral_constant<bool, std::is_integral<typename T::result_type>::value && !(T::min() == 0 && (T::max() & T::max() + 1) == 0)>;
29
30 template<typename T>
31 auto test_integral_result_type(...)->std::false_type;
32
33 template<typename T>
34 auto test_integral_fullbit_result_type(int)->std::integral_constant<bool, std::is_integral<typename T::result_type>::value&& T::min() == 0 && (T::max() & T::max() + 1) == 0>;
35
36 template<typename T>
37 auto test_integral_fullbit_result_type(...)->std::false_type;
38
39 template<typename T>
40 auto test_intpacket_result_type(int)->std::integral_constant<bool, internal::IsIntPacket<typename T::result_type>::value>;
41
42 template<typename T>
43 auto test_intpacket_result_type(...)->std::false_type;
44 }
45
46 template<typename Ty>
47 struct IsScalarRandomEngine : decltype(detail::test_integral_result_type<Ty>(0))
48 {
49 };
50
51 template<typename Ty>
52 struct IsScalarFullBitRandomEngine : decltype(detail::test_integral_fullbit_result_type<Ty>(0))
53 {
54 };
55
56 template<typename Ty>
57 struct IsPacketRandomEngine : decltype(detail::test_intpacket_result_type<Ty>(0))
58 {
59 };
60
61 enum class RandomEngineType
62 {
63 none, scalar, scalar_fullbit, packet
64 };
65
66 template<typename Ty>
67 struct GetRandomEngineType : std::integral_constant <
68 RandomEngineType,
69 IsPacketRandomEngine<Ty>::value ? RandomEngineType::packet :
70 IsScalarFullBitRandomEngine<Ty>::value ? RandomEngineType::scalar_fullbit :
71 IsScalarRandomEngine<Ty>::value ? RandomEngineType::scalar : RandomEngineType::none
72 >
73 {
74 };
75
76 template<typename Ty, size_t length, size_t alignment = 64>
77 class AlignedArray
78 {
79 public:
80 AlignedArray()
81 {
82 allocate();
83 for (size_t i = 0; i < length; ++i)
84 {
85 new (&aligned[i]) Ty();
86 }
87 }
88
89 AlignedArray(const AlignedArray& o)
90 {
91 allocate();
92 for (size_t i = 0; i < length; ++i)
93 {
94 aligned[i] = o[i];
95 }
96 }
97
98 AlignedArray(AlignedArray&& o)
99 {
100 std::swap(memory, o.memory);
101 std::swap(aligned, o.aligned);
102 }
103
104 AlignedArray& operator=(const AlignedArray& o)
105 {
106 for (size_t i = 0; i < length; ++i)
107 {
108 aligned[i] = o[i];
109 }
110 return *this;
111 }
112
113 AlignedArray& operator=(AlignedArray&& o)
114 {
115 std::swap(memory, o.memory);
116 std::swap(aligned, o.aligned);
117 return *this;
118 }
119
120 ~AlignedArray()
121 {
122 deallocate();
123 }
124
125 Ty& operator[](size_t i)
126 {
127 return aligned[i];
128 }
129
130 const Ty& operator[](size_t i) const
131 {
132 return aligned[i];
133 }
134
135 size_t size() const
136 {
137 return length;
138 }
139
140 Ty* data()
141 {
142 return aligned;
143 }
144
145 const Ty* data() const
146 {
147 return aligned;
148 }
149
150 private:
151 void allocate()
152 {
153 memory = std::malloc(sizeof(Ty) * length + alignment);
154 aligned = (Ty*)(((size_t)memory + alignment) & ~(alignment - 1));
155 }
156
157 void deallocate()
158 {
159 if (memory)
160 {
161 for (size_t i = 0; i < length; ++i)
162 {
163 aligned[i].~Ty();
164 }
165 std::free(memory);
166 memory = nullptr;
167 aligned = nullptr;
168 }
169 }
170
171 void* memory = nullptr;
172 Ty* aligned = nullptr;
173 };
174
175#ifndef EIGEN_DONT_VECTORIZE
196 template<typename Packet,
197 int _Nx, int _Mx,
198 int _Rx, uint64_t _Px,
199 int _Ux, uint64_t _Dx,
200 int _Sx, uint64_t _Bx,
201 int _Tx, uint64_t _Cx,
202 int _Lx, uint64_t _Fx>
204 {
205 public:
206 using result_type = Packet;
207
208 static constexpr int word_size = 64;
209 static constexpr int state_size = _Nx;
210 static constexpr int shift_size = _Mx;
211 static constexpr int mask_bits = _Rx;
212 static constexpr uint64_t parameter_a = _Px;
213 static constexpr int output_u = _Ux;
214 static constexpr int output_s = _Sx;
215 static constexpr uint64_t output_b = _Bx;
216 static constexpr int output_t = _Tx;
217 static constexpr uint64_t output_c = _Cx;
218 static constexpr int output_l = _Lx;
219
220 static constexpr uint64_t default_seed = 5489U;
221
230 MersenneTwister(uint64_t x0 = default_seed)
231 {
232 using namespace Eigen::internal;
233 std::array<uint64_t, unpacket_traits<Packet>::size / 2> seeds;
234 for (uint64_t i = 0; i < seeds.size(); ++i)
235 {
236 seeds[i] = x0 + i;
237 }
238 seed(ploadu<Packet>((int*)seeds.data()));
239 }
240
247 {
248 seed(x0);
249 }
250
256 void seed(Packet x0)
257 {
258 using namespace Eigen::internal;
259 Packet prev = state[0] = x0;
260 for (int i = 1; i < _Nx; ++i)
261 {
262 prev = state[i] = pmuluadd64(pxor(prev, psrl64<word_size - 2>(prev)), _Fx, i);
263 }
264 stateIdx = _Nx;
265 }
266
272 static constexpr uint64_t min()
273 {
274 return 0;
275 }
276
282 static constexpr uint64_t max()
283 {
284 return _wMask;
285 }
286
295 result_type operator()()
296 {
297 if (stateIdx == _Nx)
298 refill_upper();
299 else if (2 * _Nx <= stateIdx)
300 refill_lower();
301
302 using namespace Eigen::internal;
303
304 Packet res = state[stateIdx++];
305 res = pxor(res, pand(psrl64<_Ux>(res), pseti64<Packet>(_Dx)));
306 res = pxor(res, pand(psll64<_Sx>(res), pseti64<Packet>(_Bx)));
307 res = pxor(res, pand(psll64<_Tx>(res), pseti64<Packet>(_Cx)));
308 res = pxor(res, psrl64<_Lx>(res));
309 return res;
310 }
311
317 void discard(unsigned long long num)
318 {
319 for (; 0 < num; --num)
320 {
321 operator()();
322 }
323 }
324
325 typename internal::HalfPacket<Packet>::type half()
326 {
327 if (valid)
328 {
329 valid = false;
330 return cache;
331 }
332 typename internal::HalfPacket<Packet>::type a;
333 internal::split_two(operator()(), a, cache);
334 valid = true;
335 return a;
336 }
337
338 protected:
339
340 void refill_lower()
341 {
342 using namespace Eigen::internal;
343
344 auto hmask = pseti64<Packet>(_hMask),
345 lmask = pseti64<Packet>(_lMask),
346 px = pseti64<Packet>(_Px),
347 one = pseti64<Packet>(1);
348
349 int i;
350 for (i = 0; i < _Nx - _Mx; ++i)
351 {
352 Packet tmp = por(pand(state[i + _Nx], hmask),
353 pand(state[i + _Nx + 1], lmask));
354
355 state[i] = pxor(pxor(
356 psrl64<1>(tmp),
357 pand(pcmpeq64(pand(tmp, one), one), px)),
358 state[i + _Nx + _Mx]
359 );
360 }
361
362 for (; i < _Nx - 1; ++i)
363 {
364 Packet tmp = por(pand(state[i + _Nx], hmask),
365 pand(state[i + _Nx + 1], lmask));
366
367 state[i] = pxor(pxor(
368 psrl64<1>(tmp),
369 pand(pcmpeq64(pand(tmp, one), one), px)),
370 state[i - _Nx + _Mx]
371 );
372 }
373
374 Packet tmp = por(pand(state[i + _Nx], hmask),
375 pand(state[0], lmask));
376 state[i] = pxor(pxor(
377 psrl64<1>(tmp),
378 pand(pcmpeq64(pand(tmp, one), one), px)),
379 state[_Mx - 1]
380 );
381 stateIdx = 0;
382 }
383
384 void refill_upper()
385 {
386 using namespace Eigen::internal;
387
388 auto hmask = pseti64<Packet>(_hMask),
389 lmask = pseti64<Packet>(_lMask),
390 px = pseti64<Packet>(_Px),
391 one = pseti64<Packet>(1);
392
393 for (int i = _Nx; i < 2 * _Nx; ++i)
394 {
395 Packet tmp = por(pand(state[i - _Nx], hmask),
396 pand(state[i - _Nx + 1], lmask));
397
398 state[i] = pxor(pxor(
399 psrl64<1>(tmp),
400 pand(pcmpeq64(pand(tmp, one), one), px)),
401 state[i - _Nx + _Mx]
402 );
403 }
404 }
405
406 AlignedArray<Packet, _Nx * 2> state;
407 size_t stateIdx = 0;
408 typename internal::HalfPacket<Packet>::type cache;
409 bool valid = false;
410
411 static constexpr uint64_t _wMask = (uint64_t)-1;
412 static constexpr uint64_t _hMask = (_wMask << _Rx) & _wMask;
413 static constexpr uint64_t _lMask = ~_hMask & _wMask;
414 };
415
421 template<typename Packet>
422 using Pmt19937_64 = MersenneTwister<Packet, 312, 156, 31,
423 0xb5026f5aa96619e9, 29,
424 0x5555555555555555, 17,
425 0x71d67fffeda60000, 37,
426 0xfff7eee000000000, 43, 6364136223846793005>;
427#endif
428
429 template<typename UIntType, typename BaseRng, int numU64>
430 class ParallelRandomEngineAdaptor
431 {
432 static_assert(GetRandomEngineType<BaseRng>::value != RandomEngineType::none, "BaseRng must be a kind of Random Engine.");
433 static_assert(GetRandomEngineType<BaseRng>::value != RandomEngineType::scalar, "BaseRng must be a kind of mersenne_twister_engine.");
434 public:
435 using result_type = UIntType;
436
437 ParallelRandomEngineAdaptor(size_t seed = BaseRng::default_seed)
438 {
439 for (int i = 0; i < num_parallel; ++i)
440 {
441 rngs[i].~BaseRng();
442 new (&rngs[i]) BaseRng{ seed + i * u64_stride };
443 }
444 }
445
446 ParallelRandomEngineAdaptor(const BaseRng& o)
447 {
448 for (int i = 0; i < num_parallel; ++i)
449 {
450 rngs[i].~BaseRng();
451 new (&rngs[i]) BaseRng{ o };
452 }
453 }
454
455 ParallelRandomEngineAdaptor(const ParallelRandomEngineAdaptor&) = default;
456 ParallelRandomEngineAdaptor(ParallelRandomEngineAdaptor&&) = default;
457
458 static constexpr result_type min()
459 {
460 return std::numeric_limits<result_type>::min();
461 }
462
463 static constexpr result_type max()
464 {
465 return std::numeric_limits<result_type>::max();
466 }
467
468 result_type operator()()
469 {
470 if (cnt >= buf_size)
471 {
472 refill_buffer();
473 }
474 return buf[cnt++];
475 }
476
477 float uniform_real()
478 {
479 if (fcnt >= fbuf_size)
480 {
481 refill_fbuffer();
482 }
483 return fbuf[fcnt++];
484 }
485 private:
486
487 void refill_buffer()
488 {
489 cnt = 0;
490 for (size_t i = 0; i < num_parallel; ++i)
491 {
492 reinterpret_cast<typename BaseRng::result_type&>(buf[i * result_type_stride]) = rngs[i]();
493 }
494 }
495
496 void refill_fbuffer()
497 {
498 fcnt = 0;
499 for (size_t i = 0; i < num_parallel; ++i)
500 {
501 auto urf = internal::bit_to_ur_float(rngs[i]());
502 reinterpret_cast<decltype(urf)&>(fbuf[i * u64_stride * 2]) = urf;
503 }
504 }
505
506 static constexpr int u64_stride = sizeof(typename BaseRng::result_type) / sizeof(uint64_t);
507 static constexpr int result_type_stride = sizeof(typename BaseRng::result_type) / sizeof(result_type);
508 static constexpr int num_parallel = numU64 / u64_stride;
509 static constexpr int byte_size = sizeof(uint64_t) * numU64;
510 static constexpr size_t buf_size = byte_size / sizeof(result_type);
511 static constexpr size_t fbuf_size = byte_size / sizeof(float);
512
513 std::array<BaseRng, num_parallel> rngs;
514 AlignedArray<result_type, buf_size> buf;
515 AlignedArray<float, fbuf_size> fbuf;
516 size_t cnt = buf_size, fcnt = fbuf_size;
517 };
518
525 template<typename UIntType, typename BaseRng>
526 using PacketRandomEngineAdaptor = ParallelRandomEngineAdaptor<UIntType, BaseRng,
527 sizeof(typename BaseRng::result_type) / sizeof(uint64_t)>;
528
529 template<typename BaseRng>
530 class RandomEngineWrapper : public BaseRng
531 {
532 public:
533 using BaseRng::BaseRng;
534
535 RandomEngineWrapper(const BaseRng& o) : BaseRng{ o }
536 {
537 }
538
539 RandomEngineWrapper(BaseRng&& o) : BaseRng{ o }
540 {
541 }
542
543 RandomEngineWrapper(size_t seed) : BaseRng{ seed }
544 {
545 }
546
547 RandomEngineWrapper() = default;
548 RandomEngineWrapper(const RandomEngineWrapper&) = default;
549 RandomEngineWrapper(RandomEngineWrapper&&) = default;
550
551 float uniform_real()
552 {
553 internal::BitScalar<float> bs;
554 return bs.to_ur(this->operator()());
555 }
556 };
557
558 template<typename UIntType, typename Rng>
559 using UniversalRandomEngine = typename std::conditional<
560 IsPacketRandomEngine<typename std::remove_reference<Rng>::type>::value,
561 PacketRandomEngineAdaptor<UIntType, typename std::remove_reference<Rng>::type>,
562 typename std::conditional<
563 IsScalarFullBitRandomEngine<typename std::remove_reference<Rng>::type>::value,
564 RandomEngineWrapper<typename std::remove_reference<Rng>::type>,
565 void
566 >::type
567 >::type;
568
577 template<typename UIntType, typename Rng>
578 UniversalRandomEngine<UIntType, Rng> makeUniversalRng(Rng&& rng)
579 {
580 static_assert(IsPacketRandomEngine<typename std::remove_reference<Rng>::type>::value || IsScalarFullBitRandomEngine<typename std::remove_reference<Rng>::type>::value,
581 "`Rng` must be a kind of RandomPacketEngine like std::mersenne_twister_engine");
582 return { std::forward<Rng>(rng) };
583 }
584
585#ifdef EIGEN_VECTORIZE_AVX2
586 using Vmt19937_64 = Pmt19937_64<internal::Packet8i>;
587#elif defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_SSE2) || defined(EIGEN_VECTORIZE_NEON)
588 using Vmt19937_64 = Pmt19937_64<internal::Packet4i>;
589#else
598 using Vmt19937_64 = std::mt19937_64;
599#endif
600 template<typename UIntType = uint64_t>
601 using P8_mt19937 = ParallelRandomEngineAdaptor<UIntType, Vmt19937_64, 8>;
602
607 using P8_mt19937_64 = P8_mt19937<uint64_t>;
608
609 using P8_mt19937_64_32 = P8_mt19937<uint32_t>;
610 }
611}
612
613#endif
A vectorized version of Mersenne Twister Engine.
Definition: PacketRandomEngine.h:204
MersenneTwister(Packet x0)
Construct a new Mersenne Twister engine with a packet seed.
Definition: PacketRandomEngine.h:246
void seed(Packet x0)
initialize the engine with a given seed
Definition: PacketRandomEngine.h:256
static constexpr uint64_t max()
maximum value of the result
Definition: PacketRandomEngine.h:282
result_type operator()()
Generates one random packet and advance the internal state.
Definition: PacketRandomEngine.h:295
MersenneTwister(uint64_t x0=default_seed)
Construct a new Mersenne Twister engine with a scalar seed.
Definition: PacketRandomEngine.h:230
static constexpr uint64_t min()
minimum value of the result
Definition: PacketRandomEngine.h:272
void discard(unsigned long long num)
Discards num items being generated.
Definition: PacketRandomEngine.h:317
P8_mt19937< uint64_t > P8_mt19937_64
a vectorized mt19937_64 which generates 8 integers of 64bit simultaneously. It always yields the same...
Definition: PacketRandomEngine.h:607
UniversalRandomEngine< UIntType, Rng > makeUniversalRng(Rng &&rng)
Helper function for making a UniversalRandomEngine.
Definition: PacketRandomEngine.h:578
std::mt19937_64 Vmt19937_64
same as std::mt19937_64 when EIGEN_DONT_VECTORIZE, Pmt19937_64<internal::Packet4i> when SSE2 enabled ...
Definition: PacketRandomEngine.h:598
ParallelRandomEngineAdaptor< UIntType, BaseRng, sizeof(typename BaseRng::result_type)/sizeof(uint64_t)> PacketRandomEngineAdaptor
Scalar adaptor for random engines which generates packet.
Definition: PacketRandomEngine.h:527