RandUtils.h
Go to the documentation of this file.
1 
12 #ifndef EIGENRAND_RAND_UTILS_H
13 #define EIGENRAND_RAND_UTILS_H
14 
16 #include <EigenRand/PacketFilter.h>
18 
19 namespace Eigen
20 {
21  namespace internal
22  {
23  template<typename IntPacket>
24  EIGEN_STRONG_INLINE auto bit_to_ur_float(const IntPacket& x) -> decltype(reinterpret_to_float(x))
25  {
26  using FloatPacket = decltype(reinterpret_to_float(x));
27 
28  const IntPacket lower = pset1<IntPacket>(0x7FFFFF),
29  upper = pset1<IntPacket>(127 << 23);
30  const FloatPacket one = pset1<FloatPacket>(1);
31 
32  return psub(reinterpret_to_float(por(pand(x, lower), upper)), one);
33  }
34 
35  template<typename IntPacket>
36  EIGEN_STRONG_INLINE auto bit_to_ur_double(const IntPacket& x) -> decltype(reinterpret_to_double(x))
37  {
38  using DoublePacket = decltype(reinterpret_to_double(x));
39 
40  const IntPacket lower = pseti64<IntPacket>(0xFFFFFFFFFFFFFull),
41  upper = pseti64<IntPacket>(1023ull << 52);
42  const DoublePacket one = pset1<DoublePacket>(1);
43 
44  return psub(reinterpret_to_double(por(pand(x, lower), upper)), one);
45  }
46 
47  template<typename Scalar>
48  struct bit_scalar;
49 
50  template<>
51  struct bit_scalar<float>
52  {
53  float to_ur(uint32_t x)
54  {
55  union
56  {
57  uint32_t u;
58  float f;
59  };
60  u = (x & 0x7FFFFF) | (127 << 23);
61  return f - 1.f;
62  }
63 
64  float to_nzur(uint32_t x)
65  {
66  return to_ur(x) + std::numeric_limits<float>::epsilon() / 8;
67  }
68  };
69 
70  template<>
71  struct bit_scalar<double>
72  {
73  double to_ur(uint64_t x)
74  {
75  union
76  {
77  uint64_t u;
78  double f;
79  };
80  u = (x & 0xFFFFFFFFFFFFFull) | (1023ull << 52);
81  return f - 1.;
82  }
83 
84  double to_nzur(uint64_t x)
85  {
86  return to_ur(x) + std::numeric_limits<double>::epsilon() / 8;
87  }
88  };
89 
90  template<typename Packet, typename Rng,
91  typename RngResult = typename std::remove_reference<Rng>::type::result_type,
92  Rand::RandomEngineType reType = Rand::GetRandomEngineType<
93  typename std::remove_reference<Rng>::type
94  >::value>
95  struct RawbitsMaker;
96 
97  template<typename PacketType, typename Rng>
98  struct UniformRealUtils;
99 
100  template<typename PacketType, typename Rng>
101  struct RandUtils : public UniformRealUtils<PacketType, Rng>
102  {
103  EIGEN_STRONG_INLINE PacketType balanced(Rng& rng)
104  {
105  return psub(pmul(this->zero_to_one(rng), pset1<PacketType>(2)), pset1<PacketType>(1));
106  }
107 
108  EIGEN_STRONG_INLINE PacketType nonzero_uniform_real(Rng& rng)
109  {
110  constexpr auto epsilon = std::numeric_limits<typename unpacket_traits<PacketType>::type>::epsilon() / 8;
111  return padd(this->uniform_real(rng), pset1<PacketType>(epsilon));
112  }
113  };
114 
115  EIGEN_STRONG_INLINE uint32_t collect_upper8bits(uint32_t a, uint32_t b, uint32_t c)
116  {
117  return ((a & 0xFF000000) >> 24) | ((b & 0xFF000000) >> 16) | ((c & 0xFF000000) >> 8);
118  }
119  }
120 }
121 
122 #ifdef EIGEN_VECTORIZE_AVX
123 #include <immintrin.h>
124 
125 namespace Eigen
126 {
127  namespace internal
128  {
129  template<typename Rng>
130  struct RawbitsMaker<Packet4i, Rng, Packet8i, Rand::RandomEngineType::packet>
131  {
132  EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
133  {
134  return rng.half();
135  }
136 
137  EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
138  {
139  return rng.half();
140  }
141 
142  EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
143  {
144  return rng.half();
145  }
146  };
147 
148  template<typename Rng>
149  struct RawbitsMaker<Packet8i, Rng, Packet4i, Rand::RandomEngineType::packet>
150  {
151  EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
152  {
153  return _mm256_insertf128_si256(_mm256_castsi128_si256(rng()), rng(), 1);
154  }
155 
156  EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
157  {
158  return _mm256_insertf128_si256(_mm256_castsi128_si256(rng()), rng(), 1);
159  }
160 
161  EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
162  {
163  return rng();
164  }
165  };
166 
167  template<typename Rng, typename RngResult>
168  struct RawbitsMaker<Packet8i, Rng, RngResult, Rand::RandomEngineType::scalar>
169  {
170  EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
171  {
172  if (sizeof(decltype(rng())) == 8)
173  {
174  return _mm256_set_epi64x(rng(), rng(), rng(), rng());
175  }
176  else
177  {
178  return _mm256_set_epi32(rng(), rng(), rng(), rng(),
179  rng(), rng(), rng(), rng());
180  }
181  }
182 
183  EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
184  {
185  Packet8i p;
186  if (sizeof(decltype(rng())) == 8)
187  {
188 #ifdef EIGEN_VECTORIZE_AVX2
189  p = _mm256_setr_epi64x(rng(), rng(), rng(), 0);
190  p = _mm256_permutevar8x32_epi32(p, _mm256_setr_epi32(0, 1, 2, 7, 3, 4, 5, 7));
191  p = _mm256_shuffle_epi8(p, _mm256_setr_epi8(
192  0, 1, 2, 3,
193  4, 5, 6, 7,
194  8, 9, 10, 11,
195  3, 7, 11, 11,
196  0, 1, 2, 3,
197  4, 5, 6, 7,
198  8, 9, 10, 11,
199  3, 7, 11, 11
200  ));
201 
202 #else
203  auto v = rng();
204  p = _mm256_setr_epi64x(rng(), v, rng(), v >> 32);
205  Packet4i p1, p2, o = _mm_setr_epi8(
206  0, 1, 2, 3,
207  4, 5, 6, 7,
208  8, 9, 10, 11,
209  3, 7, 11, 11);
210  split_two(p, p1, p2);
211  p = combine_two(_mm_shuffle_epi8(p1, o), _mm_shuffle_epi8(p2, o));
212 #endif
213  }
214  else
215  {
216  p = _mm256_setr_epi32(rng(), rng(), rng(), 0, rng(), rng(), rng(), 0);
217 #ifdef EIGEN_VECTORIZE_AVX2
218  p = _mm256_shuffle_epi8(p, _mm256_setr_epi8(
219  0, 1, 2, 3,
220  4, 5, 6, 7,
221  8, 9, 10, 11,
222  3, 7, 11, 11,
223  0, 1, 2, 3,
224  4, 5, 6, 7,
225  8, 9, 10, 11,
226  3, 7, 11, 11
227  ));
228 #else
229  Packet4i p1, p2, o = _mm_setr_epi8(
230  0, 1, 2, 3,
231  4, 5, 6, 7,
232  8, 9, 10, 11,
233  3, 7, 11, 11);
234  split_two(p, p1, p2);
235  p = combine_two(_mm_shuffle_epi8(p1, o), _mm_shuffle_epi8(p2, o));
236 #endif
237  }
238  return p;
239  }
240 
241  EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
242  {
243  if (sizeof(decltype(rng())) == 8)
244  {
245  return _mm_set_epi64x(rng(), rng());
246  }
247  else
248  {
249  return _mm_set_epi32(rng(), rng(), rng(), rng());
250  }
251  }
252  };
253 
254  template<typename Rng>
255  struct RawbitsMaker<Packet8i, Rng, Packet8i, Rand::RandomEngineType::packet>
256  {
257  EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
258  {
259  return rng();
260  }
261 
262  EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
263  {
264  return rng();
265  }
266 
267  EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
268  {
269  return rng.half();
270  }
271  };
272 
273 #ifndef EIGEN_VECTORIZE_AVX2
274  template<>
275  EIGEN_STRONG_INLINE Packet8f bit_to_ur_float<Packet8i>(const Packet8i& x)
276  {
277  const Packet4i lower = pset1<Packet4i>(0x7FFFFF),
278  upper = pset1<Packet4i>(127 << 23);
279  const Packet8f one = pset1<Packet8f>(1);
280 
281  Packet4i x1, x2;
282  split_two(x, x1, x2);
283 
284  return psub(reinterpret_to_float(
285  combine_two(por(pand(x1, lower), upper), por(pand(x2, lower), upper)
286  )), one);
287  }
288 
289  template<>
290  EIGEN_STRONG_INLINE Packet4d bit_to_ur_double<Packet8i>(const Packet8i& x)
291  {
292  const Packet4i lower = pseti64<Packet4i>(0xFFFFFFFFFFFFFull),
293  upper = pseti64<Packet4i>(1023ull << 52);
294  const Packet4d one = pset1<Packet4d>(1);
295 
296  Packet4i x1, x2;
297  split_two(x, x1, x2);
298 
299  return psub(reinterpret_to_double(
300  combine_two(por(pand(x1, lower), upper), por(pand(x2, lower), upper)
301  )), one);
302  }
303 #endif
304 
305  template<typename Rng>
306  struct UniformRealUtils<Packet8f, Rng> : public RawbitsMaker<Packet8i, Rng>
307  {
308  EIGEN_STRONG_INLINE Packet8f zero_to_one(Rng& rng)
309  {
310  return pdiv(_mm256_cvtepi32_ps(pand(this->rawbits(rng), pset1<Packet8i>(0x7FFFFFFF))),
311  pset1<Packet8f>(0x7FFFFFFF));
312  }
313 
314  EIGEN_STRONG_INLINE Packet8f uniform_real(Rng& rng)
315  {
316  return bit_to_ur_float(this->rawbits_34(rng));
317  }
318  };
319 
320  template<typename Rng>
321  struct UniformRealUtils<Packet4d, Rng> : public RawbitsMaker<Packet8i, Rng>
322  {
323  EIGEN_STRONG_INLINE Packet4d zero_to_one(Rng& rng)
324  {
325  return pdiv(_mm256_cvtepi32_pd(pand(this->rawbits_half(rng), pset1<Packet4i>(0x7FFFFFFF))),
326  pset1<Packet4d>(0x7FFFFFFF));
327  }
328 
329  EIGEN_STRONG_INLINE Packet4d uniform_real(Rng& rng)
330  {
331  return bit_to_ur_double(this->rawbits(rng));
332  }
333  };
334  }
335 }
336 #endif
337 
338 #ifdef EIGEN_VECTORIZE_SSE2
339 #include <xmmintrin.h>
340 
341 namespace Eigen
342 {
343  namespace internal
344  {
345  template<typename Rng, typename RngResult>
346  struct RawbitsMaker<Packet4i, Rng, RngResult, Rand::RandomEngineType::scalar>
347  {
348  EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
349  {
350  if (sizeof(RngResult) == 8)
351  {
352  return _mm_set_epi64x(rng(), rng());
353  }
354  else
355  {
356  return _mm_set_epi32(rng(), rng(), rng(), rng());
357  }
358  }
359 
360  EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
361  {
362  if (sizeof(RngResult) == 8)
363  {
364  return _mm_set_epi64x(rng(), rng());
365  }
366  else
367  {
368 #ifdef EIGEN_VECTORIZE_SSSE3
369  Packet4i p = _mm_setr_epi32(rng(), rng(), rng(), 0);
370  return _mm_shuffle_epi8(p, _mm_setr_epi8(
371  0, 1, 2, 3,
372  4, 5, 6, 7,
373  8, 9, 10, 11,
374  3, 7, 11, 11));
375 #else
376  return _mm_set_epi32(rng(), rng(), rng(), rng());
377 #endif
378  }
379  }
380 
381  EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
382  {
383  if (sizeof(decltype(rng())) == 8)
384  {
385  return _mm_set_epi64x(0, rng());
386  }
387  else
388  {
389  return _mm_setr_epi32(rng(), rng(), 0, 0);
390  }
391  }
392  };
393 
394  template<typename Rng>
395  struct RawbitsMaker<Packet4i, Rng, Packet4i, Rand::RandomEngineType::packet>
396  {
397  EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
398  {
399  return rng();
400  }
401 
402  EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
403  {
404  return rng();
405  }
406 
407  EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
408  {
409  return rng();
410  }
411  };
412 
413  template<typename Rng>
414  struct UniformRealUtils<Packet4f, Rng> : public RawbitsMaker<Packet4i, Rng>
415  {
416  EIGEN_STRONG_INLINE Packet4f zero_to_one(Rng& rng)
417  {
418  return pdiv(_mm_cvtepi32_ps(pand(this->rawbits(rng), pset1<Packet4i>(0x7FFFFFFF))),
419  pset1<Packet4f>(0x7FFFFFFF));
420  }
421 
422  EIGEN_STRONG_INLINE Packet4f uniform_real(Rng& rng)
423  {
424  return bit_to_ur_float(this->rawbits_34(rng));
425  }
426  };
427 
428  template<typename Rng>
429  struct UniformRealUtils<Packet2d, Rng> : public RawbitsMaker<Packet4i, Rng>
430  {
431  EIGEN_STRONG_INLINE Packet2d zero_to_one(Rng& rng)
432  {
433  return pdiv(_mm_cvtepi32_pd(pand(this->rawbits_half(rng), pset1<Packet4i>(0x7FFFFFFF))),
434  pset1<Packet2d>(0x7FFFFFFF));
435  }
436 
437  EIGEN_STRONG_INLINE Packet2d uniform_real(Rng& rng)
438  {
439  return bit_to_ur_double(this->rawbits(rng));
440  }
441  };
442  }
443 }
444 #endif
445 
446 namespace Eigen
447 {
448  namespace internal
449  {
450  template<typename Scalar, typename Rng>
451  struct scalar_base_rng
452  {
453  static_assert(
454  Rand::IsScalarRandomEngine<
455  typename std::remove_reference<Rng>::type
456  >::value ||
457  Rand::IsPacketRandomEngine<
458  typename std::remove_reference<Rng>::type
459  >::value, "Rng must satisfy RandomNumberEngine");
460 
461  Rng rng;
462 
463  scalar_base_rng(const Rng& _rng) : rng{ _rng }
464  {
465  }
466 
467  scalar_base_rng(const scalar_base_rng& o)
468  : rng{ o.rng }
469  {
470  }
471 
472  scalar_base_rng(scalar_base_rng&& o)
473  : rng{ std::move(o.rng) }
474  {
475  }
476  };
477  }
478 }
479 
480 #endif
PacketRandomEngine.h
Eigen::Rand::balanced
const BalancedType< Derived, Urng > balanced(Index rows, Index cols, Urng &&urng)
generates reals in a range [-1, 1]
Definition: Core.h:126
MorePacketMath.h
PacketFilter.h