EigenRand  0.3.0
PacketRandomEngine.h
Go to the documentation of this file.
1 
12 #ifndef EIGENRAND_PACKET_RANDOM_ENGINE_H
13 #define EIGENRAND_PACKET_RANDOM_ENGINE_H
14 
15 #include <array>
16 #include <random>
17 #include <type_traits>
19 #include <fstream>
20 
21 namespace Eigen
22 {
23  namespace Rand
24  {
25  namespace detail
26  {
27  template<typename T>
28  auto test_integral_result_type(int)->std::integral_constant<bool, std::is_integral<typename T::result_type>::value>;
29 
30  template<typename T>
31  auto test_integral_result_type(...)->std::false_type;
32 
33  template<typename T>
34  auto test_intpacket_result_type(int)->std::integral_constant<bool, internal::IsIntPacket<typename T::result_type>::value>;
35 
36  template<typename T>
37  auto test_intpacket_result_type(...)->std::false_type;
38  }
39 
40  template<typename Ty>
41  struct IsScalarRandomEngine : decltype(detail::test_integral_result_type<Ty>(0))
42  {
43  };
44 
45  template<typename Ty>
46  struct IsPacketRandomEngine : decltype(detail::test_intpacket_result_type<Ty>(0))
47  {
48  };
49 
50  enum class RandomEngineType
51  {
52  none, scalar, packet
53  };
54 
55  template<typename Ty>
56  struct GetRandomEngineType : std::integral_constant <
57  RandomEngineType,
58  IsPacketRandomEngine<Ty>::value ? RandomEngineType::packet :
59  (IsScalarRandomEngine<Ty>::value ? RandomEngineType::scalar : RandomEngineType::none)
60  >
61  {
62  };
63 
64  template<typename Ty, size_t length, size_t alignment = 64>
65  class AlignedArray
66  {
67  public:
68  AlignedArray()
69  {
70  allocate();
71  for (size_t i = 0; i < length; ++i)
72  {
73  new (&aligned[i]) Ty();
74  }
75  }
76 
77  AlignedArray(const AlignedArray& o)
78  {
79  allocate();
80  for (size_t i = 0; i < length; ++i)
81  {
82  aligned[i] = o[i];
83  }
84  }
85 
86  AlignedArray(AlignedArray&& o)
87  {
88  std::swap(memory, o.memory);
89  std::swap(aligned, o.aligned);
90  }
91 
92  AlignedArray& operator=(const AlignedArray& o)
93  {
94  for (size_t i = 0; i < length; ++i)
95  {
96  aligned[i] = o[i];
97  }
98  return *this;
99  }
100 
101  AlignedArray& operator=(AlignedArray&& o)
102  {
103  std::swap(memory, o.memory);
104  std::swap(aligned, o.aligned);
105  return *this;
106  }
107 
108  ~AlignedArray()
109  {
110  deallocate();
111  }
112 
113  Ty& operator[](size_t i)
114  {
115  return aligned[i];
116  }
117 
118  const Ty& operator[](size_t i) const
119  {
120  return aligned[i];
121  }
122 
123  size_t size() const
124  {
125  return length;
126  }
127 
128  Ty* data()
129  {
130  return aligned;
131  }
132 
133  const Ty* data() const
134  {
135  return aligned;
136  }
137 
138  private:
139  void allocate()
140  {
141  memory = std::malloc(sizeof(Ty) * length + alignment);
142  aligned = (Ty*)(((size_t)memory + alignment) & ~(alignment - 1));
143  }
144 
145  void deallocate()
146  {
147  if (memory)
148  {
149  for (size_t i = 0; i < length; ++i)
150  {
151  aligned[i].~Ty();
152  }
153  std::free(memory);
154  memory = nullptr;
155  aligned = nullptr;
156  }
157  }
158 
159  void* memory = nullptr;
160  Ty* aligned = nullptr;
161  };
162 
163 #ifndef EIGEN_DONT_VECTORIZE
184  template<typename Packet,
185  int _Nx, int _Mx,
186  int _Rx, uint64_t _Px,
187  int _Ux, uint64_t _Dx,
188  int _Sx, uint64_t _Bx,
189  int _Tx, uint64_t _Cx,
190  int _Lx, uint64_t _Fx>
192  {
193  public:
194  using result_type = Packet;
195 
196  static constexpr int word_size = 64;
197  static constexpr int state_size = _Nx;
198  static constexpr int shift_size = _Mx;
199  static constexpr int mask_bits = _Rx;
200  static constexpr uint64_t parameter_a = _Px;
201  static constexpr int output_u = _Ux;
202  static constexpr int output_s = _Sx;
203  static constexpr uint64_t output_b = _Bx;
204  static constexpr int output_t = _Tx;
205  static constexpr uint64_t output_c = _Cx;
206  static constexpr int output_l = _Lx;
207 
208  static constexpr uint64_t default_seed = 5489U;
209 
218  MersenneTwister(uint64_t x0 = default_seed)
219  {
220  using namespace Eigen::internal;
221  std::array<uint64_t, unpacket_traits<Packet>::size / 2> seeds;
222  for (uint64_t i = 0; i < seeds.size(); ++i)
223  {
224  seeds[i] = x0 + i;
225  }
226  seed(ploadu<Packet>((int*)seeds.data()));
227  }
228 
234  MersenneTwister(Packet x0)
235  {
236  seed(x0);
237  }
238 
244  void seed(Packet x0)
245  {
246  using namespace Eigen::internal;
247  Packet prev = state[0] = x0;
248  for (int i = 1; i < _Nx; ++i)
249  {
250  prev = state[i] = pmuluadd64(pxor(prev, psrl64(prev, word_size - 2)), _Fx, i);
251  }
252  stateIdx = _Nx;
253  }
254 
260  uint64_t min() const
261  {
262  return 0;
263  }
264 
270  uint64_t max() const
271  {
272  return _wMask;
273  }
274 
283  result_type operator()()
284  {
285  if (stateIdx == _Nx)
286  refill_upper();
287  else if (2 * _Nx <= stateIdx)
288  refill_lower();
289 
290  using namespace Eigen::internal;
291 
292  Packet res = state[stateIdx++];
293  res = pxor(res, pand(psrl64(res, _Ux), pseti64<Packet>(_Dx)));
294  res = pxor(res, pand(psll64(res, _Sx), pseti64<Packet>(_Bx)));
295  res = pxor(res, pand(psll64(res, _Tx), pseti64<Packet>(_Cx)));
296  res = pxor(res, psrl64(res, _Lx));
297  return res;
298  }
299 
305  void discard(unsigned long long num)
306  {
307  for (; 0 < num; --num)
308  {
309  operator()();
310  }
311  }
312 
313  typename internal::HalfPacket<Packet>::type half()
314  {
315  if (valid)
316  {
317  valid = false;
318  return cache;
319  }
320  typename internal::HalfPacket<Packet>::type a;
321  internal::split_two(operator()(), a, cache);
322  valid = true;
323  return a;
324  }
325 
326  protected:
327 
328  void refill_lower()
329  {
330  using namespace Eigen::internal;
331 
332  auto hmask = pseti64<Packet>(_hMask),
333  lmask = pseti64<Packet>(_lMask),
334  px = pseti64<Packet>(_Px),
335  one = pseti64<Packet>(1);
336 
337  int i;
338  for (i = 0; i < _Nx - _Mx; ++i)
339  {
340  Packet tmp = por(pand(state[i + _Nx], hmask),
341  pand(state[i + _Nx + 1], lmask));
342 
343  state[i] = pxor(pxor(
344  psrl64(tmp, 1),
345  pand(pcmpeq64(pand(tmp, one), one), px)),
346  state[i + _Nx + _Mx]
347  );
348  }
349 
350  for (; i < _Nx - 1; ++i)
351  {
352  Packet tmp = por(pand(state[i + _Nx], hmask),
353  pand(state[i + _Nx + 1], lmask));
354 
355  state[i] = pxor(pxor(
356  psrl64(tmp, 1),
357  pand(pcmpeq64(pand(tmp, one), one), px)),
358  state[i - _Nx + _Mx]
359  );
360  }
361 
362  Packet tmp = por(pand(state[i + _Nx], hmask),
363  pand(state[0], lmask));
364  state[i] = pxor(pxor(
365  psrl64(tmp, 1),
366  pand(pcmpeq64(pand(tmp, one), one), px)),
367  state[_Mx - 1]
368  );
369  stateIdx = 0;
370  }
371 
372  void refill_upper()
373  {
374  using namespace Eigen::internal;
375 
376  auto hmask = pseti64<Packet>(_hMask),
377  lmask = pseti64<Packet>(_lMask),
378  px = pseti64<Packet>(_Px),
379  one = pseti64<Packet>(1);
380 
381  for (int i = _Nx; i < 2 * _Nx; ++i)
382  {
383  Packet tmp = por(pand(state[i - _Nx], hmask),
384  pand(state[i - _Nx + 1], lmask));
385 
386  state[i] = pxor(pxor(
387  psrl64(tmp, 1),
388  pand(pcmpeq64(pand(tmp, one), one), px)),
389  state[i - _Nx + _Mx]
390  );
391  }
392  }
393 
394  AlignedArray<Packet, _Nx * 2> state;
395  size_t stateIdx = 0;
396  typename internal::HalfPacket<Packet>::type cache;
397  bool valid = false;
398 
399  static constexpr uint64_t _wMask = (uint64_t)-1;
400  static constexpr uint64_t _hMask = (_wMask << _Rx) & _wMask;
401  static constexpr uint64_t _lMask = ~_hMask & _wMask;
402  };
403 
409  template<typename Packet>
410  using Pmt19937_64 = MersenneTwister<Packet, 312, 156, 31,
411  0xb5026f5aa96619e9, 29,
412  0x5555555555555555, 17,
413  0x71d67fffeda60000, 37,
414  0xfff7eee000000000, 43, 6364136223846793005>;
415 #endif
416 
417  template<typename UIntType, typename BaseRng, int numU64>
418  class ParallelRandomEngineAdaptor
419  {
420  static_assert(GetRandomEngineType<BaseRng>::value != RandomEngineType::none, "BaseRng must be a kind of Random Engine.");
421  public:
422  using result_type = UIntType;
423 
424  ParallelRandomEngineAdaptor(size_t seed = BaseRng::default_seed)
425  {
426  for (int i = 0; i < num_parallel; ++i)
427  {
428  rngs[i].~BaseRng();
429  new (&rngs[i]) BaseRng{ seed + i * u64_stride };
430  }
431  }
432 
433  ParallelRandomEngineAdaptor(const BaseRng& o)
434  {
435  for (int i = 0; i < num_parallel; ++i)
436  {
437  rngs[i].~BaseRng();
438  new (&rngs[i]) BaseRng{ o };
439  }
440  }
441 
442  ParallelRandomEngineAdaptor(const ParallelRandomEngineAdaptor&) = default;
443  ParallelRandomEngineAdaptor(ParallelRandomEngineAdaptor&&) = default;
444 
445  static constexpr result_type min()
446  {
447  return std::numeric_limits<result_type>::min();
448  }
449 
450  static constexpr result_type max()
451  {
452  return std::numeric_limits<result_type>::max();
453  }
454 
455  result_type operator()()
456  {
457  if (cnt >= buf_size)
458  {
459  refill_buffer();
460  }
461  return buf[cnt++];
462  }
463 
464  float uniform_real()
465  {
466  if (fcnt >= fbuf_size)
467  {
468  refill_fbuffer();
469  }
470  return fbuf[fcnt++];
471  }
472  private:
473 
474  void refill_buffer()
475  {
476  cnt = 0;
477  for (size_t i = 0; i < num_parallel; ++i)
478  {
479  reinterpret_cast<typename BaseRng::result_type&>(buf[i * result_type_stride]) = rngs[i]();
480  }
481  }
482 
483  void refill_fbuffer()
484  {
485  fcnt = 0;
486  for (size_t i = 0; i < num_parallel; ++i)
487  {
488  auto urf = internal::bit_to_ur_float(rngs[i]());
489  reinterpret_cast<decltype(urf)&>(fbuf[i * u64_stride * 2]) = urf;
490  }
491  }
492 
493  static constexpr int u64_stride = sizeof(typename BaseRng::result_type) / sizeof(uint64_t);
494  static constexpr int result_type_stride = sizeof(typename BaseRng::result_type) / sizeof(result_type);
495  static constexpr int num_parallel = numU64 / u64_stride;
496  static constexpr int byte_size = sizeof(uint64_t) * numU64;
497  static constexpr size_t buf_size = byte_size / sizeof(result_type);
498  static constexpr size_t fbuf_size = byte_size / sizeof(float);
499 
500  std::array<BaseRng, num_parallel> rngs;
501  AlignedArray<result_type, buf_size> buf;
502  AlignedArray<float, fbuf_size> fbuf;
503  size_t cnt = buf_size, fcnt = fbuf_size;
504  };
505 
512  template<typename UIntType, typename BaseRng>
513  using PacketRandomEngineAdaptor = ParallelRandomEngineAdaptor<UIntType, BaseRng,
514  sizeof(typename BaseRng::result_type) / sizeof(uint64_t)>;
515 
516  template<typename BaseRng>
517  class RandomEngineWrapper : public BaseRng
518  {
519  public:
520  using BaseRng::BaseRng;
521 
522  RandomEngineWrapper(const BaseRng& o) : BaseRng{ o }
523  {
524  }
525 
526  RandomEngineWrapper(BaseRng&& o) : BaseRng{ o }
527  {
528  }
529 
530  RandomEngineWrapper(size_t seed) : BaseRng{ seed }
531  {
532  }
533 
534  RandomEngineWrapper() = default;
535  RandomEngineWrapper(const RandomEngineWrapper&) = default;
536  RandomEngineWrapper(RandomEngineWrapper&&) = default;
537 
538  float uniform_real()
539  {
540  internal::bit_scalar<float> bs;
541  return bs.to_ur(this->operator()());
542  }
543  };
544 
545  template<typename UIntType, typename Rng>
546  using UniversalRandomEngine = typename std::conditional<
547  IsPacketRandomEngine<typename std::remove_reference<Rng>::type>::value,
548  PacketRandomEngineAdaptor<UIntType, typename std::remove_reference<Rng>::type>,
549  typename std::conditional<
550  IsScalarRandomEngine<typename std::remove_reference<Rng>::type>::value,
551  RandomEngineWrapper<typename std::remove_reference<Rng>::type>,
552  void
553  >::type
554  >::type;
555 
564  template<typename UIntType, typename Rng>
565  UniversalRandomEngine<UIntType, Rng> makeUniversalRng(Rng&& rng)
566  {
567  return { std::forward<Rng>(rng) };
568  }
569 
570 #ifdef EIGEN_VECTORIZE_AVX2
571  using Vmt19937_64 = Pmt19937_64<internal::Packet8i>;
572 #elif defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_SSE2)
573  using Vmt19937_64 = Pmt19937_64<internal::Packet4i>;
574 #else
583  using Vmt19937_64 = std::mt19937_64;
584 #endif
589  template<typename UIntType = uint64_t>
590  using P8_mt19937_64 = ParallelRandomEngineAdaptor<UIntType, Vmt19937_64, 8>;
591  }
592 }
593 
594 #endif
A vectorized version of Mersenne Twister Engine.
Definition: PacketRandomEngine.h:192
MersenneTwister(Packet x0)
Construct a new Mersenne Twister engine with a packet seed.
Definition: PacketRandomEngine.h:234
uint64_t max() const
maximum value of the result
Definition: PacketRandomEngine.h:270
void seed(Packet x0)
initialize the engine with a given seed
Definition: PacketRandomEngine.h:244
result_type operator()()
Generates one random packet and advance the internal state.
Definition: PacketRandomEngine.h:283
MersenneTwister(uint64_t x0=default_seed)
Construct a new Mersenne Twister engine with a scalar seed.
Definition: PacketRandomEngine.h:218
uint64_t min() const
minimum value of the result
Definition: PacketRandomEngine.h:260
void discard(unsigned long long num)
Discards num items being generated.
Definition: PacketRandomEngine.h:305
ParallelRandomEngineAdaptor< UIntType, Vmt19937_64, 8 > P8_mt19937_64
a vectorized mt19937_64 which generates 8 integers of 64bit simultaneously. It always yields the same...
Definition: PacketRandomEngine.h:590
UniversalRandomEngine< UIntType, Rng > makeUniversalRng(Rng &&rng)
Helper function for making a UniversalRandomEngine.
Definition: PacketRandomEngine.h:565
std::mt19937_64 Vmt19937_64
same as std::mt19937_64 when EIGEN_DONT_VECTORIZE, Pmt19937_64<internal::Packet4i> when SSE2 enabled ...
Definition: PacketRandomEngine.h:583
ParallelRandomEngineAdaptor< UIntType, BaseRng, sizeof(typename BaseRng::result_type)/sizeof(uint64_t)> PacketRandomEngineAdaptor
Scalar adaptor for random engines which generates packet.
Definition: PacketRandomEngine.h:514