12#ifndef EIGENRAND_RAND_UTILS_AVX_H
13#define EIGENRAND_RAND_UTILS_AVX_H
21 template<
typename Rng>
22 struct RawbitsMaker<Packet4i, Rng, Packet8i, Rand::RandomEngineType::packet>
24 EIGEN_STRONG_INLINE Packet4i rawbits(Rng& rng)
29 EIGEN_STRONG_INLINE Packet4i rawbits_34(Rng& rng)
34 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
40 template<
typename Rng>
41 struct RawbitsMaker<Packet8i, Rng, Packet4i, Rand::RandomEngineType::packet>
43 EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
45 return _mm256_insertf128_si256(_mm256_castsi128_si256(rng()), rng(), 1);
48 EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
50 return _mm256_insertf128_si256(_mm256_castsi128_si256(rng()), rng(), 1);
53 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
59 template<
typename Rng,
typename RngResult>
60 struct RawbitsMaker<Packet8i, Rng, RngResult, Rand::RandomEngineType::scalar_fullbit>
62 EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
64 if (
sizeof(
decltype(rng())) == 8)
66 return _mm256_set_epi64x(rng(), rng(), rng(), rng());
70 return _mm256_set_epi32(rng(), rng(), rng(), rng(),
71 rng(), rng(), rng(), rng());
75 EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
78 if (
sizeof(
decltype(rng())) == 8)
80#ifdef EIGEN_VECTORIZE_AVX2
81 p = _mm256_setr_epi64x(rng(), rng(), rng(), 0);
82 p = _mm256_permutevar8x32_epi32(p, _mm256_setr_epi32(0, 1, 2, 7, 3, 4, 5, 7));
83 p = _mm256_shuffle_epi8(p, _mm256_setr_epi8(
96 p = _mm256_setr_epi64x(rng(), v, rng(), v >> 32);
97 Packet4i p1, p2, o = _mm_setr_epi8(
102 split_two(p, p1, p2);
103 p = combine_two(_mm_shuffle_epi8(p1, o), _mm_shuffle_epi8(p2, o));
108 p = _mm256_setr_epi32(rng(), rng(), rng(), 0, rng(), rng(), rng(), 0);
109#ifdef EIGEN_VECTORIZE_AVX2
110 p = _mm256_shuffle_epi8(p, _mm256_setr_epi8(
121 Packet4i p1, p2, o = _mm_setr_epi8(
126 split_two(p, p1, p2);
127 p = combine_two(_mm_shuffle_epi8(p1, o), _mm_shuffle_epi8(p2, o));
133 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
135 if (
sizeof(
decltype(rng())) == 8)
137 return _mm_set_epi64x(rng(), rng());
141 return _mm_set_epi32(rng(), rng(), rng(), rng());
146 template<
typename Rng>
147 struct RawbitsMaker<Packet8i, Rng, Packet8i, Rand::RandomEngineType::packet>
149 EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
154 EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
159 EIGEN_STRONG_INLINE Packet4i rawbits_half(Rng& rng)
165#ifndef EIGEN_VECTORIZE_AVX2
167 EIGEN_STRONG_INLINE Packet8f bit_to_ur_float<Packet8i>(
const Packet8i& x)
169 const Packet4i lower = pset1<Packet4i>(0x7FFFFF),
170 upper = pset1<Packet4i>(127 << 23);
171 const Packet8f one = pset1<Packet8f>(1);
174 split_two(x, x1, x2);
176 return psub(reinterpret_to_float(
177 combine_two(por(pand(x1, lower), upper), por(pand(x2, lower), upper)
182 EIGEN_STRONG_INLINE Packet4d bit_to_ur_double<Packet8i>(
const Packet8i& x)
184 const Packet4i lower = pseti64<Packet4i>(0xFFFFFFFFFFFFFull),
185 upper = pseti64<Packet4i>(1023ull << 52);
186 const Packet4d one = pset1<Packet4d>(1);
189 split_two(x, x1, x2);
191 return psub(reinterpret_to_double(
192 combine_two(por(pand(x1, lower), upper), por(pand(x2, lower), upper)
197 template<
typename Rng>
198 struct UniformRealUtils<Packet8f, Rng> :
public RawbitsMaker<Packet8i, Rng>
200 EIGEN_STRONG_INLINE Packet8f zero_to_one(Rng& rng)
202 return pdiv(_mm256_cvtepi32_ps(pand(this->rawbits(rng), pset1<Packet8i>(0x7FFFFFFF))),
203 pset1<Packet8f>(0x7FFFFFFF));
206 EIGEN_STRONG_INLINE Packet8f uniform_real(Rng& rng)
208 return bit_to_ur_float(this->rawbits_34(rng));
212 template<
typename Rng>
213 struct UniformRealUtils<Packet4d, Rng> :
public RawbitsMaker<Packet8i, Rng>
215 EIGEN_STRONG_INLINE Packet4d zero_to_one(Rng& rng)
217 return pdiv(_mm256_cvtepi32_pd(pand(this->rawbits_half(rng), pset1<Packet4i>(0x7FFFFFFF))),
218 pset1<Packet4d>(0x7FFFFFFF));
221 EIGEN_STRONG_INLINE Packet4d uniform_real(Rng& rng)
223 return bit_to_ur_double(this->rawbits(rng));