EigenRand  0.5.0
 
Loading...
Searching...
No Matches
arch/AVX/RandUtils.h
Go to the documentation of this file.
1
12#ifndef EIGENRAND_RAND_UTILS_AVX_H
13#define EIGENRAND_RAND_UTILS_AVX_H
14
15#include <immintrin.h>
16
17namespace Eigen
18{
19 namespace internal
20 {
21 template<typename Rng>
22 struct RawbitsMaker<Packet4i, Rng, Packet8i, Rand::RandomEngineType::packet>
23 {
24 EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
25 {
26 return rng.half();
27 }
28
29 EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
30 {
31 return rng.half();
32 }
33
34 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
35 {
36 return rng.half();
37 }
38 };
39
40 template<typename Rng>
41 struct RawbitsMaker<Packet8i, Rng, Packet4i, Rand::RandomEngineType::packet>
42 {
43 EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
44 {
45 return _mm256_insertf128_si256(_mm256_castsi128_si256(rng()), rng(), 1);
46 }
47
48 EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
49 {
50 return _mm256_insertf128_si256(_mm256_castsi128_si256(rng()), rng(), 1);
51 }
52
53 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
54 {
55 return rng();
56 }
57 };
58
59 template<typename Rng, typename RngResult>
60 struct RawbitsMaker<Packet8i, Rng, RngResult, Rand::RandomEngineType::scalar_fullbit>
61 {
62 EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
63 {
64 if (sizeof(decltype(rng())) == 8)
65 {
66 return _mm256_set_epi64x(rng(), rng(), rng(), rng());
67 }
68 else
69 {
70 return _mm256_set_epi32(rng(), rng(), rng(), rng(),
71 rng(), rng(), rng(), rng());
72 }
73 }
74
75 EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
76 {
77 Packet8i p;
78 if (sizeof(decltype(rng())) == 8)
79 {
80#ifdef EIGEN_VECTORIZE_AVX2
81 p = _mm256_setr_epi64x(rng(), rng(), rng(), 0);
82 p = _mm256_permutevar8x32_epi32(p, _mm256_setr_epi32(0, 1, 2, 7, 3, 4, 5, 7));
83 p = _mm256_shuffle_epi8(p, _mm256_setr_epi8(
84 0, 1, 2, 3,
85 4, 5, 6, 7,
86 8, 9, 10, 11,
87 3, 7, 11, 11,
88 0, 1, 2, 3,
89 4, 5, 6, 7,
90 8, 9, 10, 11,
91 3, 7, 11, 11
92 ));
93
94#else
95 auto v = rng();
96 p = _mm256_setr_epi64x(rng(), v, rng(), v >> 32);
97 Packet4i p1, p2, o = _mm_setr_epi8(
98 0, 1, 2, 3,
99 4, 5, 6, 7,
100 8, 9, 10, 11,
101 3, 7, 11, 11);
102 split_two(p, p1, p2);
103 p = combine_two(_mm_shuffle_epi8(p1, o), _mm_shuffle_epi8(p2, o));
104#endif
105 }
106 else
107 {
108 p = _mm256_setr_epi32(rng(), rng(), rng(), 0, rng(), rng(), rng(), 0);
109#ifdef EIGEN_VECTORIZE_AVX2
110 p = _mm256_shuffle_epi8(p, _mm256_setr_epi8(
111 0, 1, 2, 3,
112 4, 5, 6, 7,
113 8, 9, 10, 11,
114 3, 7, 11, 11,
115 0, 1, 2, 3,
116 4, 5, 6, 7,
117 8, 9, 10, 11,
118 3, 7, 11, 11
119 ));
120#else
121 Packet4i p1, p2, o = _mm_setr_epi8(
122 0, 1, 2, 3,
123 4, 5, 6, 7,
124 8, 9, 10, 11,
125 3, 7, 11, 11);
126 split_two(p, p1, p2);
127 p = combine_two(_mm_shuffle_epi8(p1, o), _mm_shuffle_epi8(p2, o));
128#endif
129 }
130 return p;
131 }
132
133 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
134 {
135 if (sizeof(decltype(rng())) == 8)
136 {
137 return _mm_set_epi64x(rng(), rng());
138 }
139 else
140 {
141 return _mm_set_epi32(rng(), rng(), rng(), rng());
142 }
143 }
144 };
145
146 template<typename Rng>
147 struct RawbitsMaker<Packet8i, Rng, Packet8i, Rand::RandomEngineType::packet>
148 {
149 EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
150 {
151 return rng();
152 }
153
154 EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
155 {
156 return rng();
157 }
158
159 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
160 {
161 return rng.half();
162 }
163 };
164
165#ifndef EIGEN_VECTORIZE_AVX2
166 template<>
167 EIGEN_STRONG_INLINE Packet8f bit_to_ur_float<Packet8i>(const Packet8i& x)
168 {
169 const Packet4i lower = pset1<Packet4i>(0x7FFFFF),
170 upper = pset1<Packet4i>(127 << 23);
171 const Packet8f one = pset1<Packet8f>(1);
172
173 Packet4i x1, x2;
174 split_two(x, x1, x2);
175
176 return psub(reinterpret_to_float(
177 combine_two(por(pand(x1, lower), upper), por(pand(x2, lower), upper)
178 )), one);
179 }
180
181 template<>
182 EIGEN_STRONG_INLINE Packet4d bit_to_ur_double<Packet8i>(const Packet8i& x)
183 {
184 const Packet4i lower = pseti64<Packet4i>(0xFFFFFFFFFFFFFull),
185 upper = pseti64<Packet4i>(1023ull << 52);
186 const Packet4d one = pset1<Packet4d>(1);
187
188 Packet4i x1, x2;
189 split_two(x, x1, x2);
190
191 return psub(reinterpret_to_double(
192 combine_two(por(pand(x1, lower), upper), por(pand(x2, lower), upper)
193 )), one);
194 }
195#endif
196
197 template<typename Rng>
198 struct UniformRealUtils<Packet8f, Rng> : public RawbitsMaker<Packet8i, Rng>
199 {
200 EIGEN_STRONG_INLINE Packet8f zero_to_one(Rng& rng)
201 {
202 return pdiv(_mm256_cvtepi32_ps(pand(this->rawbits(rng), pset1<Packet8i>(0x7FFFFFFF))),
203 pset1<Packet8f>(0x7FFFFFFF));
204 }
205
206 EIGEN_STRONG_INLINE Packet8f uniform_real(Rng& rng)
207 {
208 return bit_to_ur_float(this->rawbits_34(rng));
209 }
210 };
211
212 template<typename Rng>
213 struct UniformRealUtils<Packet4d, Rng> : public RawbitsMaker<Packet8i, Rng>
214 {
215 EIGEN_STRONG_INLINE Packet4d zero_to_one(Rng& rng)
216 {
217 return pdiv(_mm256_cvtepi32_pd(pand(this->rawbits_half(rng), pset1<Packet4i>(0x7FFFFFFF))),
218 pset1<Packet4d>(0x7FFFFFFF));
219 }
220
221 EIGEN_STRONG_INLINE Packet4d uniform_real(Rng& rng)
222 {
223 return bit_to_ur_double(this->rawbits(rng));
224 }
225 };
226 }
227}
228#endif