EigenRand  0.5.0
 
Loading...
Searching...
No Matches
arch/AVX512/MorePacketMath.h
Go to the documentation of this file.
1
12#ifndef EIGENRAND_MORE_PACKET_MATH_AVX512_H
13#define EIGENRAND_MORE_PACKET_MATH_AVX512_H
14
15#include <immintrin.h>
16
17namespace Eigen
18{
19 namespace internal
20 {
21 template<>
22 struct IsIntPacket<Packet16i> : std::true_type {};
23
24 template<>
25 struct HalfPacket<Packet16i>
26 {
27 using type = Packet8i;
28 };
29
30 template<>
31 struct HalfPacket<Packet16f>
32 {
33 using type = Packet8f;
34 };
35
36 template<>
37 struct IsFloatPacket<Packet16f> : std::true_type {};
38
39 template<>
40 struct IsDoublePacket<Packet8d> : std::true_type {};
41
42 template<>
43 struct reinterpreter<Packet16i>
44 {
45 EIGEN_STRONG_INLINE Packet16f to_float(const Packet16i& x)
46 {
47 return _mm512_castsi512_ps(x);
48 }
49
50 EIGEN_STRONG_INLINE Packet8d to_double(const Packet16i& x)
51 {
52 return _mm512_castsi512_pd(x);
53 }
54
55 EIGEN_STRONG_INLINE Packet16i to_int(const Packet16i& x)
56 {
57 return x;
58 }
59 };
60
61 template<>
62 struct reinterpreter<Packet16f>
63 {
64 EIGEN_STRONG_INLINE Packet16f to_float(const Packet16f& x)
65 {
66 return x;
67 }
68
69 EIGEN_STRONG_INLINE Packet8d to_double(const Packet16f& x)
70 {
71 return _mm512_castps_pd(x);
72 }
73
74 EIGEN_STRONG_INLINE Packet16i to_int(const Packet16f& x)
75 {
76 return _mm512_castps_si512(x);
77 }
78 };
79
80 template<>
81 struct reinterpreter<Packet8d>
82 {
83 EIGEN_STRONG_INLINE Packet16f to_float(const Packet8d& x)
84 {
85 return _mm512_castpd_ps(x);
86 }
87
88 EIGEN_STRONG_INLINE Packet8d to_double(const Packet8d& x)
89 {
90 return x;
91 }
92
93 EIGEN_STRONG_INLINE Packet16i to_int(const Packet8d& x)
94 {
95 return _mm512_castpd_si512(x);
96 }
97 };
98
99 template<>
100 EIGEN_STRONG_INLINE Packet16i pseti64<Packet16i>(uint64_t a)
101 {
102 return _mm512_set1_epi64(a);
103 }
104
105 template<>
106 EIGEN_STRONG_INLINE Packet16i padd64<Packet16i>(const Packet16i& a, const Packet16i& b)
107 {
108 return _mm512_add_epi64(a, b);
109 }
110
111 template<>
112 EIGEN_STRONG_INLINE Packet16i psub64<Packet16i>(const Packet16i& a, const Packet16i& b)
113 {
114 return _mm512_sub_epi64(a, b);
115 }
116
117 template<>
118 EIGEN_STRONG_INLINE Packet16i pcmpeq<Packet16i>(const Packet16i& a, const Packet16i& b)
119 {
120 return pcmp_eq(a, b);
121 }
122
123 template<>
124 EIGEN_STRONG_INLINE Packet16f pcmpeq<Packet16f>(const Packet16f& a, const Packet16f& b)
125 {
126 return pcmp_eq(a, b);
127 }
128
129 template<>
130 EIGEN_STRONG_INLINE Packet16i pnegate<Packet16i>(const Packet16i& a)
131 {
132 return _mm512_sub_epi32(pset1<Packet16i>(0), a);
133 }
134
135 template<>
136 struct BitShifter<Packet16i>
137 {
138 template<int b>
139 EIGEN_STRONG_INLINE Packet16i sll(const Packet16i& a)
140 {
141 return _mm512_slli_epi32(a, b);
142 }
143
144 template<int b>
145 EIGEN_STRONG_INLINE Packet16i srl(const Packet16i& a, int _b = b)
146 {
147 if (b >= 0)
148 {
149 return _mm512_srli_epi32(a, b);
150 }
151 else
152 {
153 return _mm512_srli_epi32(a, _b);
154 }
155 }
156
157 template<int b>
158 EIGEN_STRONG_INLINE Packet16i sll64(const Packet16i& a)
159 {
160 return _mm512_slli_epi64(a, b);
161 }
162
163 template<int b>
164 EIGEN_STRONG_INLINE Packet16i srl64(const Packet16i& a)
165 {
166 return _mm512_srli_epi64(a, b);
167 }
168 };
169
170 template<> EIGEN_STRONG_INLINE bool predux_all(const Packet16i& x)
171 {
172 return _mm512_movepi32_mask(x) == 0xFFFF;
173 }
174
175 template<> EIGEN_STRONG_INLINE bool predux_all(const Packet16f& x)
176 {
177 return predux_all(_mm512_castps_si512(x));
178 }
179
180 template<>
181 EIGEN_STRONG_INLINE Packet16i pcmplt<Packet16i>(const Packet16i& a, const Packet16i& b)
182 {
183 __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
184 return _mm512_movm_epi32(mask);
185 }
186
187 template<>
188 EIGEN_STRONG_INLINE Packet16f pcmplt<Packet16f>(const Packet16f& a, const Packet16f& b)
189 {
190 return pcmp_lt(a, b);
191 }
192
193 template<>
194 EIGEN_STRONG_INLINE Packet16f pcmple<Packet16f>(const Packet16f& a, const Packet16f& b)
195 {
196 return pcmp_le(a, b);
197 }
198
199 template<>
200 EIGEN_STRONG_INLINE Packet8d pcmplt<Packet8d>(const Packet8d& a, const Packet8d& b)
201 {
202 return pcmp_lt(a, b);
203 }
204 template<>
205 EIGEN_STRONG_INLINE Packet8d pcmple<Packet8d>(const Packet8d& a, const Packet8d& b)
206 {
207 return pcmp_le(a, b);
208 }
209
210 template<>
211 EIGEN_STRONG_INLINE Packet16f pblendv(const Packet16i& ifPacket, const Packet16f& thenPacket, const Packet16f& elsePacket)
212 {
213 __mmask16 mask = _mm512_movepi32_mask(ifPacket);
214 return _mm512_mask_blend_ps(mask, elsePacket, thenPacket);
215 }
216
217 template<>
218 EIGEN_STRONG_INLINE Packet16f pblendv(const Packet16f& ifPacket, const Packet16f& thenPacket, const Packet16f& elsePacket)
219 {
220 return pblendv(_mm512_castps_si512(ifPacket), thenPacket, elsePacket);
221 }
222
223 template<>
224 EIGEN_STRONG_INLINE Packet16i pblendv(const Packet16i& ifPacket, const Packet16i& thenPacket, const Packet16i& elsePacket)
225 {
226 __mmask16 mask = _mm512_movepi32_mask(ifPacket);
227 return _mm512_mask_blend_epi32(mask, elsePacket, thenPacket);
228 }
229
230 template<>
231 EIGEN_STRONG_INLINE Packet8d pblendv(const Packet16i& ifPacket, const Packet8d& thenPacket, const Packet8d& elsePacket)
232 {
233 __mmask8 mask = _mm512_movepi64_mask(ifPacket);
234 return _mm512_mask_blend_pd(mask, elsePacket, thenPacket);
235 }
236
237 template<>
238 EIGEN_STRONG_INLINE Packet8d pblendv(const Packet8d& ifPacket, const Packet8d& thenPacket, const Packet8d& elsePacket)
239 {
240 return pblendv(_mm512_castpd_si512(ifPacket), thenPacket, elsePacket);
241 }
242
243 template<>
244 EIGEN_STRONG_INLINE Packet16i pgather<Packet16i>(const int* addr, const Packet16i& index)
245 {
246 return _mm512_i32gather_epi32(index, addr, 4);
247 }
248
249 template<>
250 EIGEN_STRONG_INLINE Packet16f pgather<Packet16i>(const float* addr, const Packet16i& index)
251 {
252 return _mm512_i32gather_ps(index, addr, 4);
253 }
254
255 template<>
256 EIGEN_STRONG_INLINE Packet8d pgather<Packet16i>(const double* addr, const Packet16i& index, bool upperhalf)
257 {
258 return _mm512_i32gather_pd(_mm512_castsi512_si256(index), addr, 8);
259 }
260
261 template<>
262 EIGEN_STRONG_INLINE Packet16f ptruncate<Packet16f>(const Packet16f& a)
263 {
264 return _mm512_roundscale_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
265 }
266
267 template<>
268 EIGEN_STRONG_INLINE Packet8d ptruncate<Packet8d>(const Packet8d& a)
269 {
270 return _mm512_roundscale_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
271 }
272
273 template<>
274 EIGEN_STRONG_INLINE Packet16i pcmpeq64<Packet16i>(const Packet16i& a, const Packet16i& b)
275 {
276 __mmask8 mask = _mm512_cmp_epi64_mask(a, b, _MM_CMPINT_EQ);
277 return _mm512_movm_epi64(mask);
278 }
279
280 EIGEN_STRONG_INLINE __m512d int64_to_double_avx512(__m512i x) {
281 x = padd64(x, _mm512_castpd_si512(_mm512_set1_pd(0x0018000000000000)));
282 return _mm512_sub_pd(_mm512_castsi512_pd(x), _mm512_set1_pd(0x0018000000000000));
283 }
284
285 EIGEN_STRONG_INLINE __m512i double_to_int64_avx512(__m512d x) {
286 x = _mm512_add_pd(_mm512_floor_pd(x), _mm512_set1_pd(0x0018000000000000));
287 return psub64(
288 _mm512_castpd_si512(x),
289 _mm512_castpd_si512(_mm512_set1_pd(0x0018000000000000))
290 );
291 }
292 template<>
293 EIGEN_STRONG_INLINE Packet16i pcast64<Packet8d, Packet16i>(const Packet8d& a)
294 {
295 return double_to_int64_avx512(a);
296 }
297
298 template<>
299 EIGEN_STRONG_INLINE Packet8d pcast64<Packet16i, Packet8d>(const Packet16i& a)
300 {
301 return int64_to_double_avx512(a);
302 }
303
304 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
305 Packet8d psin<Packet8d>(const Packet8d& x)
306 {
307 return _psin(x);
308 }
309 }
310}
311
312#endif