EigenRand  0.4.0-alpha
arch/AVX/RandUtils.h
Go to the documentation of this file.
1 
12 #ifndef EIGENRAND_RAND_UTILS_AVX_H
13 #define EIGENRAND_RAND_UTILS_AVX_H
14 
15 #include <immintrin.h>
16 
17 namespace Eigen
18 {
19  namespace internal
20  {
21  template<typename Rng>
22  struct RawbitsMaker<Packet4i, Rng, Packet8i, Rand::RandomEngineType::packet>
23  {
24  EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
25  {
26  return rng.half();
27  }
28 
29  EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
30  {
31  return rng.half();
32  }
33 
34  EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
35  {
36  return rng.half();
37  }
38  };
39 
40  template<typename Rng>
41  struct RawbitsMaker<Packet8i, Rng, Packet4i, Rand::RandomEngineType::packet>
42  {
43  EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
44  {
45  return _mm256_insertf128_si256(_mm256_castsi128_si256(rng()), rng(), 1);
46  }
47 
48  EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
49  {
50  return _mm256_insertf128_si256(_mm256_castsi128_si256(rng()), rng(), 1);
51  }
52 
53  EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
54  {
55  return rng();
56  }
57  };
58 
59  template<typename Rng, typename RngResult>
60  struct RawbitsMaker<Packet8i, Rng, RngResult, Rand::RandomEngineType::scalar>
61  {
62  EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
63  {
64  if (sizeof(decltype(rng())) == 8)
65  {
66  return _mm256_set_epi64x(rng(), rng(), rng(), rng());
67  }
68  else
69  {
70  return _mm256_set_epi32(rng(), rng(), rng(), rng(),
71  rng(), rng(), rng(), rng());
72  }
73  }
74 
75  EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
76  {
77  Packet8i p;
78  if (sizeof(decltype(rng())) == 8)
79  {
80 #ifdef EIGEN_VECTORIZE_AVX2
81  p = _mm256_setr_epi64x(rng(), rng(), rng(), 0);
82  p = _mm256_permutevar8x32_epi32(p, _mm256_setr_epi32(0, 1, 2, 7, 3, 4, 5, 7));
83  p = _mm256_shuffle_epi8(p, _mm256_setr_epi8(
84  0, 1, 2, 3,
85  4, 5, 6, 7,
86  8, 9, 10, 11,
87  3, 7, 11, 11,
88  0, 1, 2, 3,
89  4, 5, 6, 7,
90  8, 9, 10, 11,
91  3, 7, 11, 11
92  ));
93 
94 #else
95  auto v = rng();
96  p = _mm256_setr_epi64x(rng(), v, rng(), v >> 32);
97  Packet4i p1, p2, o = _mm_setr_epi8(
98  0, 1, 2, 3,
99  4, 5, 6, 7,
100  8, 9, 10, 11,
101  3, 7, 11, 11);
102  split_two(p, p1, p2);
103  p = combine_two(_mm_shuffle_epi8(p1, o), _mm_shuffle_epi8(p2, o));
104 #endif
105  }
106  else
107  {
108  p = _mm256_setr_epi32(rng(), rng(), rng(), 0, rng(), rng(), rng(), 0);
109 #ifdef EIGEN_VECTORIZE_AVX2
110  p = _mm256_shuffle_epi8(p, _mm256_setr_epi8(
111  0, 1, 2, 3,
112  4, 5, 6, 7,
113  8, 9, 10, 11,
114  3, 7, 11, 11,
115  0, 1, 2, 3,
116  4, 5, 6, 7,
117  8, 9, 10, 11,
118  3, 7, 11, 11
119  ));
120 #else
121  Packet4i p1, p2, o = _mm_setr_epi8(
122  0, 1, 2, 3,
123  4, 5, 6, 7,
124  8, 9, 10, 11,
125  3, 7, 11, 11);
126  split_two(p, p1, p2);
127  p = combine_two(_mm_shuffle_epi8(p1, o), _mm_shuffle_epi8(p2, o));
128 #endif
129  }
130  return p;
131  }
132 
133  EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
134  {
135  if (sizeof(decltype(rng())) == 8)
136  {
137  return _mm_set_epi64x(rng(), rng());
138  }
139  else
140  {
141  return _mm_set_epi32(rng(), rng(), rng(), rng());
142  }
143  }
144  };
145 
146  template<typename Rng>
147  struct RawbitsMaker<Packet8i, Rng, Packet8i, Rand::RandomEngineType::packet>
148  {
149  EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
150  {
151  return rng();
152  }
153 
154  EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
155  {
156  return rng();
157  }
158 
159  EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
160  {
161  return rng.half();
162  }
163  };
164 
165 #ifndef EIGEN_VECTORIZE_AVX2
166  template<>
167  EIGEN_STRONG_INLINE Packet8f bit_to_ur_float<Packet8i>(const Packet8i& x)
168  {
169  const Packet4i lower = pset1<Packet4i>(0x7FFFFF),
170  upper = pset1<Packet4i>(127 << 23);
171  const Packet8f one = pset1<Packet8f>(1);
172 
173  Packet4i x1, x2;
174  split_two(x, x1, x2);
175 
176  return psub(reinterpret_to_float(
177  combine_two(por(pand(x1, lower), upper), por(pand(x2, lower), upper)
178  )), one);
179  }
180 
181  template<>
182  EIGEN_STRONG_INLINE Packet4d bit_to_ur_double<Packet8i>(const Packet8i& x)
183  {
184  const Packet4i lower = pseti64<Packet4i>(0xFFFFFFFFFFFFFull),
185  upper = pseti64<Packet4i>(1023ull << 52);
186  const Packet4d one = pset1<Packet4d>(1);
187 
188  Packet4i x1, x2;
189  split_two(x, x1, x2);
190 
191  return psub(reinterpret_to_double(
192  combine_two(por(pand(x1, lower), upper), por(pand(x2, lower), upper)
193  )), one);
194  }
195 #endif
196 
197  template<typename Rng>
198  struct UniformRealUtils<Packet8f, Rng> : public RawbitsMaker<Packet8i, Rng>
199  {
200  EIGEN_STRONG_INLINE Packet8f zero_to_one(Rng& rng)
201  {
202  return pdiv(_mm256_cvtepi32_ps(pand(this->rawbits(rng), pset1<Packet8i>(0x7FFFFFFF))),
203  pset1<Packet8f>(0x7FFFFFFF));
204  }
205 
206  EIGEN_STRONG_INLINE Packet8f uniform_real(Rng& rng)
207  {
208  return bit_to_ur_float(this->rawbits_34(rng));
209  }
210  };
211 
212  template<typename Rng>
213  struct UniformRealUtils<Packet4d, Rng> : public RawbitsMaker<Packet8i, Rng>
214  {
215  EIGEN_STRONG_INLINE Packet4d zero_to_one(Rng& rng)
216  {
217  return pdiv(_mm256_cvtepi32_pd(pand(this->rawbits_half(rng), pset1<Packet4i>(0x7FFFFFFF))),
218  pset1<Packet4d>(0x7FFFFFFF));
219  }
220 
221  EIGEN_STRONG_INLINE Packet4d uniform_real(Rng& rng)
222  {
223  return bit_to_ur_double(this->rawbits(rng));
224  }
225  };
226  }
227 }
228 #endif