EigenRand  0.3.0
PacketRandomEngine.h
Go to the documentation of this file.
1 
12 #ifndef EIGENRAND_PACKET_RANDOM_ENGINE_H
13 #define EIGENRAND_PACKET_RANDOM_ENGINE_H
14 
15 #include <array>
16 #include <random>
17 #include <type_traits>
19 #include <fstream>
20 
21 namespace Eigen
22 {
23  namespace internal
24  {
25  template<typename Ty>
26  struct IsIntPacket : std::false_type {};
27 
28  template<typename Ty>
29  struct HalfPacket;
30 
31 #ifdef EIGEN_VECTORIZE_AVX2
32  template<>
33  struct IsIntPacket<Packet8i> : std::true_type {};
34 
35  template<>
36  struct HalfPacket<Packet8i>
37  {
38  using type = Packet4i;
39  };
40 #endif
41 #ifdef EIGEN_VECTORIZE_SSE2
42  template<>
43  struct IsIntPacket<Packet4i> : std::true_type {};
44 
45  template<>
46  struct HalfPacket<Packet4i>
47  {
48  using type = uint64_t;
49  };
50 #endif
51  }
52 
53  namespace Rand
54  {
55  namespace detail
56  {
57  template<typename T>
58  auto test_integral_result_type(int)->std::integral_constant<bool, std::is_integral<typename T::result_type>::value>;
59 
60  template<typename T>
61  auto test_integral_result_type(...)->std::false_type;
62 
63  template<typename T>
64  auto test_intpacket_result_type(int)->std::integral_constant<bool, internal::IsIntPacket<typename T::result_type>::value>;
65 
66  template<typename T>
67  auto test_intpacket_result_type(...)->std::false_type;
68  }
69 
70  template<typename Ty>
71  struct IsScalarRandomEngine : decltype(detail::test_integral_result_type<Ty>(0))
72  {
73  };
74 
75  template<typename Ty>
76  struct IsPacketRandomEngine : decltype(detail::test_intpacket_result_type<Ty>(0))
77  {
78  };
79 
80  enum class RandomEngineType
81  {
82  none, scalar, packet
83  };
84 
85  template<typename Ty>
86  struct GetRandomEngineType : std::integral_constant <
87  RandomEngineType,
88  IsPacketRandomEngine<Ty>::value ? RandomEngineType::packet :
89  (IsScalarRandomEngine<Ty>::value ? RandomEngineType::scalar : RandomEngineType::none)
90  >
91  {
92  };
93 
94  template<typename Ty, size_t length, size_t alignment = 64>
95  class AlignedArray
96  {
97  public:
98  AlignedArray()
99  {
100  allocate();
101  for (size_t i = 0; i < length; ++i)
102  {
103  new (&aligned[i]) Ty();
104  }
105  }
106 
107  AlignedArray(const AlignedArray& o)
108  {
109  allocate();
110  for (size_t i = 0; i < length; ++i)
111  {
112  aligned[i] = o[i];
113  }
114  }
115 
116  AlignedArray(AlignedArray&& o)
117  {
118  std::swap(memory, o.memory);
119  std::swap(aligned, o.aligned);
120  }
121 
122  AlignedArray& operator=(const AlignedArray& o)
123  {
124  for (size_t i = 0; i < length; ++i)
125  {
126  aligned[i] = o[i];
127  }
128  return *this;
129  }
130 
131  AlignedArray& operator=(AlignedArray&& o)
132  {
133  std::swap(memory, o.memory);
134  std::swap(aligned, o.aligned);
135  return *this;
136  }
137 
138  ~AlignedArray()
139  {
140  deallocate();
141  }
142 
143  Ty& operator[](size_t i)
144  {
145  return aligned[i];
146  }
147 
148  const Ty& operator[](size_t i) const
149  {
150  return aligned[i];
151  }
152 
153  size_t size() const
154  {
155  return length;
156  }
157 
158  Ty* data()
159  {
160  return aligned;
161  }
162 
163  const Ty* data() const
164  {
165  return aligned;
166  }
167 
168  private:
169  void allocate()
170  {
171  memory = std::malloc(sizeof(Ty) * length + alignment);
172  aligned = (Ty*)(((size_t)memory + alignment) & ~(alignment - 1));
173  }
174 
175  void deallocate()
176  {
177  if (memory)
178  {
179  for (size_t i = 0; i < length; ++i)
180  {
181  aligned[i].~Ty();
182  }
183  std::free(memory);
184  memory = nullptr;
185  aligned = nullptr;
186  }
187  }
188 
189  void* memory = nullptr;
190  Ty* aligned = nullptr;
191  };
192 
193 #ifndef EIGEN_DONT_VECTORIZE
194 
214  template<typename Packet,
215  int _Nx, int _Mx,
216  int _Rx, uint64_t _Px,
217  int _Ux, uint64_t _Dx,
218  int _Sx, uint64_t _Bx,
219  int _Tx, uint64_t _Cx,
220  int _Lx, uint64_t _Fx>
222  {
223  public:
224  using result_type = Packet;
225 
226  static constexpr int word_size = 64;
227  static constexpr int state_size = _Nx;
228  static constexpr int shift_size = _Mx;
229  static constexpr int mask_bits = _Rx;
230  static constexpr uint64_t parameter_a = _Px;
231  static constexpr int output_u = _Ux;
232  static constexpr int output_s = _Sx;
233  static constexpr uint64_t output_b = _Bx;
234  static constexpr int output_t = _Tx;
235  static constexpr uint64_t output_c = _Cx;
236  static constexpr int output_l = _Lx;
237 
238  static constexpr uint64_t default_seed = 5489U;
239 
248  MersenneTwister(uint64_t x0 = default_seed)
249  {
250  using namespace Eigen::internal;
251  std::array<uint64_t, unpacket_traits<Packet>::size / 2> seeds;
252  for (uint64_t i = 0; i < seeds.size(); ++i)
253  {
254  seeds[i] = x0 + i;
255  }
256  seed(ploadu<Packet>((int*)seeds.data()));
257  }
258 
264  MersenneTwister(Packet x0)
265  {
266  seed(x0);
267  }
268 
274  void seed(Packet x0)
275  {
276  using namespace Eigen::internal;
277  Packet prev = state[0] = x0;
278  for (int i = 1; i < _Nx; ++i)
279  {
280  prev = state[i] = pmuluadd64(pxor(prev, psrl64(prev, word_size - 2)), _Fx, i);
281  }
282  stateIdx = _Nx;
283  }
284 
290  uint64_t min() const
291  {
292  return 0;
293  }
294 
300  uint64_t max() const
301  {
302  return _wMask;
303  }
304 
313  result_type operator()()
314  {
315  if (stateIdx == _Nx)
316  refill_upper();
317  else if (2 * _Nx <= stateIdx)
318  refill_lower();
319 
320  using namespace Eigen::internal;
321 
322  Packet res = state[stateIdx++];
323  res = pxor(res, pand(psrl64(res, _Ux), pseti64<Packet>(_Dx)));
324  res = pxor(res, pand(psll64(res, _Sx), pseti64<Packet>(_Bx)));
325  res = pxor(res, pand(psll64(res, _Tx), pseti64<Packet>(_Cx)));
326  res = pxor(res, psrl64(res, _Lx));
327  return res;
328  }
329 
335  void discard(unsigned long long num)
336  {
337  for (; 0 < num; --num)
338  {
339  operator()();
340  }
341  }
342 
343  typename internal::HalfPacket<Packet>::type half()
344  {
345  if (valid)
346  {
347  valid = false;
348  return cache;
349  }
350  typename internal::HalfPacket<Packet>::type a;
351  internal::split_two(operator()(), a, cache);
352  valid = true;
353  return a;
354  }
355 
356  protected:
357 
358  void refill_lower()
359  {
360  using namespace Eigen::internal;
361 
362  auto hmask = pseti64<Packet>(_hMask),
363  lmask = pseti64<Packet>(_lMask),
364  px = pseti64<Packet>(_Px),
365  one = pseti64<Packet>(1);
366 
367  int i;
368  for (i = 0; i < _Nx - _Mx; ++i)
369  {
370  Packet tmp = por(pand(state[i + _Nx], hmask),
371  pand(state[i + _Nx + 1], lmask));
372 
373  state[i] = pxor(pxor(
374  psrl64(tmp, 1),
375  pand(pcmpeq64(pand(tmp, one), one), px)),
376  state[i + _Nx + _Mx]
377  );
378  }
379 
380  for (; i < _Nx - 1; ++i)
381  {
382  Packet tmp = por(pand(state[i + _Nx], hmask),
383  pand(state[i + _Nx + 1], lmask));
384 
385  state[i] = pxor(pxor(
386  psrl64(tmp, 1),
387  pand(pcmpeq64(pand(tmp, one), one), px)),
388  state[i - _Nx + _Mx]
389  );
390  }
391 
392  Packet tmp = por(pand(state[i + _Nx], hmask),
393  pand(state[0], lmask));
394  state[i] = pxor(pxor(
395  psrl64(tmp, 1),
396  pand(pcmpeq64(pand(tmp, one), one), px)),
397  state[_Mx - 1]
398  );
399  stateIdx = 0;
400  }
401 
402  void refill_upper()
403  {
404  using namespace Eigen::internal;
405 
406  auto hmask = pseti64<Packet>(_hMask),
407  lmask = pseti64<Packet>(_lMask),
408  px = pseti64<Packet>(_Px),
409  one = pseti64<Packet>(1);
410 
411  for (int i = _Nx; i < 2 * _Nx; ++i)
412  {
413  Packet tmp = por(pand(state[i - _Nx], hmask),
414  pand(state[i - _Nx + 1], lmask));
415 
416  state[i] = pxor(pxor(
417  psrl64(tmp, 1),
418  pand(pcmpeq64(pand(tmp, one), one), px)),
419  state[i - _Nx + _Mx]
420  );
421  }
422  }
423 
424  AlignedArray<Packet, _Nx * 2> state;
425  size_t stateIdx = 0;
426  typename internal::HalfPacket<Packet>::type cache;
427  bool valid = false;
428 
429  static constexpr uint64_t _wMask = (uint64_t)-1;
430  static constexpr uint64_t _hMask = (_wMask << _Rx) & _wMask;
431  static constexpr uint64_t _lMask = ~_hMask & _wMask;
432  };
433 
439  template<typename Packet>
440  using Pmt19937_64 = MersenneTwister<Packet, 312, 156, 31,
441  0xb5026f5aa96619e9, 29,
442  0x5555555555555555, 17,
443  0x71d67fffeda60000, 37,
444  0xfff7eee000000000, 43, 6364136223846793005>;
445 #endif
446 
447  template<typename UIntType, typename BaseRng, int numU64>
448  class ParallelRandomEngineAdaptor
449  {
450  static_assert(GetRandomEngineType<BaseRng>::value != RandomEngineType::none, "BaseRng must be a kind of Random Engine.");
451  public:
452  using result_type = UIntType;
453 
454  ParallelRandomEngineAdaptor(size_t seed = BaseRng::default_seed)
455  {
456  for (int i = 0; i < num_parallel; ++i)
457  {
458  rngs[i].~BaseRng();
459  new (&rngs[i]) BaseRng{ seed + i * u64_stride };
460  }
461  }
462 
463  ParallelRandomEngineAdaptor(const BaseRng& o)
464  {
465  for (int i = 0; i < num_parallel; ++i)
466  {
467  rngs[i].~BaseRng();
468  new (&rngs[i]) BaseRng{ o };
469  }
470  }
471 
472  ParallelRandomEngineAdaptor(const ParallelRandomEngineAdaptor&) = default;
473  ParallelRandomEngineAdaptor(ParallelRandomEngineAdaptor&&) = default;
474 
475  static constexpr result_type min()
476  {
477  return std::numeric_limits<result_type>::min();
478  }
479 
480  static constexpr result_type max()
481  {
482  return std::numeric_limits<result_type>::max();
483  }
484 
485  result_type operator()()
486  {
487  if (cnt >= buf_size)
488  {
489  refill_buffer();
490  }
491  return buf[cnt++];
492  }
493 
494  float uniform_real()
495  {
496  if (fcnt >= fbuf_size)
497  {
498  refill_fbuffer();
499  }
500  return fbuf[fcnt++];
501  }
502  private:
503 
504  void refill_buffer()
505  {
506  cnt = 0;
507  for (size_t i = 0; i < num_parallel; ++i)
508  {
509  reinterpret_cast<typename BaseRng::result_type&>(buf[i * result_type_stride]) = rngs[i]();
510  }
511  }
512 
513  void refill_fbuffer()
514  {
515  fcnt = 0;
516  for (size_t i = 0; i < num_parallel; ++i)
517  {
518  auto urf = internal::bit_to_ur_float(rngs[i]());
519  reinterpret_cast<decltype(urf)&>(fbuf[i * u64_stride * 2]) = urf;
520  }
521  }
522 
523  static constexpr int u64_stride = sizeof(typename BaseRng::result_type) / sizeof(uint64_t);
524  static constexpr int result_type_stride = sizeof(typename BaseRng::result_type) / sizeof(result_type);
525  static constexpr int num_parallel = numU64 / u64_stride;
526  static constexpr int byte_size = sizeof(uint64_t) * numU64;
527  static constexpr size_t buf_size = byte_size / sizeof(result_type);
528  static constexpr size_t fbuf_size = byte_size / sizeof(float);
529 
530  std::array<BaseRng, num_parallel> rngs;
531  AlignedArray<result_type, buf_size> buf;
532  AlignedArray<float, fbuf_size> fbuf;
533  size_t cnt = buf_size, fcnt = fbuf_size;
534  };
535 
542  template<typename UIntType, typename BaseRng>
543  using PacketRandomEngineAdaptor = ParallelRandomEngineAdaptor<UIntType, BaseRng,
544  sizeof(typename BaseRng::result_type) / sizeof(uint64_t)>;
545 
546  template<typename BaseRng>
547  class RandomEngineWrapper : public BaseRng
548  {
549  public:
550  using BaseRng::BaseRng;
551 
552  RandomEngineWrapper(const BaseRng& o) : BaseRng{ o }
553  {
554  }
555 
556  RandomEngineWrapper(BaseRng&& o) : BaseRng{ o }
557  {
558  }
559 
560  RandomEngineWrapper(size_t seed) : BaseRng{ seed }
561  {
562  }
563 
564  RandomEngineWrapper() = default;
565  RandomEngineWrapper(const RandomEngineWrapper&) = default;
566  RandomEngineWrapper(RandomEngineWrapper&&) = default;
567 
568  float uniform_real()
569  {
570  internal::bit_scalar<float> bs;
571  return bs.to_ur(this->operator()());
572  }
573  };
574 
575  template<typename UIntType, typename Rng>
576  using UniversalRandomEngine = typename std::conditional<
577  IsPacketRandomEngine<typename std::remove_reference<Rng>::type>::value,
578  PacketRandomEngineAdaptor<UIntType, typename std::remove_reference<Rng>::type>,
579  typename std::conditional<
580  IsScalarRandomEngine<typename std::remove_reference<Rng>::type>::value,
581  RandomEngineWrapper<typename std::remove_reference<Rng>::type>,
582  void
583  >::type
584  >::type;
585 
594  template<typename UIntType, typename Rng>
595  UniversalRandomEngine<UIntType, Rng> makeUniversalRng(Rng&& rng)
596  {
597  return { std::forward<Rng>(rng) };
598  }
599 
600 #ifdef EIGEN_VECTORIZE_AVX2
601  using Vmt19937_64 = Pmt19937_64<internal::Packet8i>;
602 #elif defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_SSE2)
603  using Vmt19937_64 = Pmt19937_64<internal::Packet4i>;
604 #else
605 
613  using Vmt19937_64 = std::mt19937_64;
614 #endif
615 
619  template<typename UIntType = uint64_t>
620  using P8_mt19937_64 = ParallelRandomEngineAdaptor<UIntType, Vmt19937_64, 8>;
621  }
622 }
623 
624 #endif
Eigen::Rand::MersenneTwister::min
uint64_t min() const
minimum value of the result
Definition: PacketRandomEngine.h:290
Eigen::Rand::MersenneTwister
A vectorized version of Mersenne Twister Engine.
Definition: PacketRandomEngine.h:222
MorePacketMath.h
Eigen::Rand::MersenneTwister::MersenneTwister
MersenneTwister(Packet x0)
Construct a new Mersenne Twister engine with a packet seed.
Definition: PacketRandomEngine.h:264
Eigen::Rand::MersenneTwister::discard
void discard(unsigned long long num)
Discards num items being generated.
Definition: PacketRandomEngine.h:335
Eigen::Rand::Vmt19937_64
std::mt19937_64 Vmt19937_64
same as std::mt19937_64 when EIGEN_DONT_VECTORIZE, Pmt19937_64<internal::Packet4i> when SSE2 enabled ...
Definition: PacketRandomEngine.h:613
Eigen::Rand::MersenneTwister::operator()
result_type operator()()
Generates one random packet and advance the internal state.
Definition: PacketRandomEngine.h:313
Eigen::Rand::makeUniversalRng
UniversalRandomEngine< UIntType, Rng > makeUniversalRng(Rng &&rng)
Helper function for making a UniversalRandomEngine.
Definition: PacketRandomEngine.h:595
Eigen::Rand::MersenneTwister::max
uint64_t max() const
maximum value of the result
Definition: PacketRandomEngine.h:300
Eigen::Rand::MersenneTwister::seed
void seed(Packet x0)
initialize the engine with a given seed
Definition: PacketRandomEngine.h:274
Eigen::Rand::MersenneTwister::MersenneTwister
MersenneTwister(uint64_t x0=default_seed)
Construct a new Mersenne Twister engine with a scalar seed.
Definition: PacketRandomEngine.h:248
Eigen::Rand::P8_mt19937_64
ParallelRandomEngineAdaptor< UIntType, Vmt19937_64, 8 > P8_mt19937_64
a vectorized mt19937_64 which generates 8 integers of 64bit simultaneously. It always yields the same...
Definition: PacketRandomEngine.h:620
Eigen::Rand::PacketRandomEngineAdaptor
ParallelRandomEngineAdaptor< UIntType, BaseRng, sizeof(typename BaseRng::result_type)/sizeof(uint64_t)> PacketRandomEngineAdaptor
Scalar adaptor for random engines which generates packet.
Definition: PacketRandomEngine.h:544