12#ifndef EIGENRAND_MORE_PACKET_MATH_AVX512_H
13#define EIGENRAND_MORE_PACKET_MATH_AVX512_H
22 struct IsIntPacket<Packet16i> : std::true_type {};
25 struct HalfPacket<Packet16i>
27 using type = Packet8i;
31 struct HalfPacket<Packet16f>
33 using type = Packet8f;
37 struct IsFloatPacket<Packet16f> : std::true_type {};
40 struct IsDoublePacket<Packet8d> : std::true_type {};
43 struct reinterpreter<Packet16i>
45 EIGEN_STRONG_INLINE Packet16f to_float(
const Packet16i& x)
47 return _mm512_castsi512_ps(x);
50 EIGEN_STRONG_INLINE Packet8d to_double(
const Packet16i& x)
52 return _mm512_castsi512_pd(x);
55 EIGEN_STRONG_INLINE Packet16i to_int(
const Packet16i& x)
62 struct reinterpreter<Packet16f>
64 EIGEN_STRONG_INLINE Packet16f to_float(
const Packet16f& x)
69 EIGEN_STRONG_INLINE Packet8d to_double(
const Packet16f& x)
71 return _mm512_castps_pd(x);
74 EIGEN_STRONG_INLINE Packet16i to_int(
const Packet16f& x)
76 return _mm512_castps_si512(x);
81 struct reinterpreter<Packet8d>
83 EIGEN_STRONG_INLINE Packet16f to_float(
const Packet8d& x)
85 return _mm512_castpd_ps(x);
88 EIGEN_STRONG_INLINE Packet8d to_double(
const Packet8d& x)
93 EIGEN_STRONG_INLINE Packet16i to_int(
const Packet8d& x)
95 return _mm512_castpd_si512(x);
100 EIGEN_STRONG_INLINE Packet16i pseti64<Packet16i>(uint64_t a)
102 return _mm512_set1_epi64(a);
106 EIGEN_STRONG_INLINE Packet16i padd64<Packet16i>(
const Packet16i& a,
const Packet16i& b)
108 return _mm512_add_epi64(a, b);
112 EIGEN_STRONG_INLINE Packet16i psub64<Packet16i>(
const Packet16i& a,
const Packet16i& b)
114 return _mm512_sub_epi64(a, b);
118 EIGEN_STRONG_INLINE Packet16i pcmpeq<Packet16i>(
const Packet16i& a,
const Packet16i& b)
120 return pcmp_eq(a, b);
124 EIGEN_STRONG_INLINE Packet16f pcmpeq<Packet16f>(
const Packet16f& a,
const Packet16f& b)
126 return pcmp_eq(a, b);
130 EIGEN_STRONG_INLINE Packet16i pnegate<Packet16i>(
const Packet16i& a)
132 return _mm512_sub_epi32(pset1<Packet16i>(0), a);
136 struct BitShifter<Packet16i>
139 EIGEN_STRONG_INLINE Packet16i sll(
const Packet16i& a)
141 return _mm512_slli_epi32(a, b);
145 EIGEN_STRONG_INLINE Packet16i srl(
const Packet16i& a,
int _b = b)
149 return _mm512_srli_epi32(a, b);
153 return _mm512_srli_epi32(a, _b);
158 EIGEN_STRONG_INLINE Packet16i sll64(
const Packet16i& a)
160 return _mm512_slli_epi64(a, b);
164 EIGEN_STRONG_INLINE Packet16i srl64(
const Packet16i& a)
166 return _mm512_srli_epi64(a, b);
170 template<> EIGEN_STRONG_INLINE
bool predux_all(
const Packet16i& x)
172 return _mm512_movepi32_mask(x) == 0xFFFF;
175 template<> EIGEN_STRONG_INLINE
bool predux_all(
const Packet16f& x)
177 return predux_all(_mm512_castps_si512(x));
181 EIGEN_STRONG_INLINE Packet16i pcmplt<Packet16i>(
const Packet16i& a,
const Packet16i& b)
183 __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
184 return _mm512_movm_epi32(mask);
188 EIGEN_STRONG_INLINE Packet16f pcmplt<Packet16f>(
const Packet16f& a,
const Packet16f& b)
190 return pcmp_lt(a, b);
194 EIGEN_STRONG_INLINE Packet16f pcmple<Packet16f>(
const Packet16f& a,
const Packet16f& b)
196 return pcmp_le(a, b);
200 EIGEN_STRONG_INLINE Packet8d pcmplt<Packet8d>(
const Packet8d& a,
const Packet8d& b)
202 return pcmp_lt(a, b);
205 EIGEN_STRONG_INLINE Packet8d pcmple<Packet8d>(
const Packet8d& a,
const Packet8d& b)
207 return pcmp_le(a, b);
211 EIGEN_STRONG_INLINE Packet16f pblendv(
const Packet16i& ifPacket,
const Packet16f& thenPacket,
const Packet16f& elsePacket)
213 __mmask16 mask = _mm512_movepi32_mask(ifPacket);
214 return _mm512_mask_blend_ps(mask, elsePacket, thenPacket);
218 EIGEN_STRONG_INLINE Packet16f pblendv(
const Packet16f& ifPacket,
const Packet16f& thenPacket,
const Packet16f& elsePacket)
220 return pblendv(_mm512_castps_si512(ifPacket), thenPacket, elsePacket);
224 EIGEN_STRONG_INLINE Packet16i pblendv(
const Packet16i& ifPacket,
const Packet16i& thenPacket,
const Packet16i& elsePacket)
226 __mmask16 mask = _mm512_movepi32_mask(ifPacket);
227 return _mm512_mask_blend_epi32(mask, elsePacket, thenPacket);
231 EIGEN_STRONG_INLINE Packet8d pblendv(
const Packet16i& ifPacket,
const Packet8d& thenPacket,
const Packet8d& elsePacket)
233 __mmask8 mask = _mm512_movepi64_mask(ifPacket);
234 return _mm512_mask_blend_pd(mask, elsePacket, thenPacket);
238 EIGEN_STRONG_INLINE Packet8d pblendv(
const Packet8d& ifPacket,
const Packet8d& thenPacket,
const Packet8d& elsePacket)
240 return pblendv(_mm512_castpd_si512(ifPacket), thenPacket, elsePacket);
244 EIGEN_STRONG_INLINE Packet16i pgather<Packet16i>(
const int* addr,
const Packet16i& index)
246 return _mm512_i32gather_epi32(index, addr, 4);
250 EIGEN_STRONG_INLINE Packet16f pgather<Packet16i>(
const float* addr,
const Packet16i& index)
252 return _mm512_i32gather_ps(index, addr, 4);
256 EIGEN_STRONG_INLINE Packet8d pgather<Packet16i>(
const double* addr,
const Packet16i& index,
bool upperhalf)
258 return _mm512_i32gather_pd(_mm512_castsi512_si256(index), addr, 8);
262 EIGEN_STRONG_INLINE Packet16f ptruncate<Packet16f>(
const Packet16f& a)
264 return _mm512_roundscale_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
268 EIGEN_STRONG_INLINE Packet8d ptruncate<Packet8d>(
const Packet8d& a)
270 return _mm512_roundscale_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
274 EIGEN_STRONG_INLINE Packet16i pcmpeq64<Packet16i>(
const Packet16i& a,
const Packet16i& b)
276 __mmask8 mask = _mm512_cmp_epi64_mask(a, b, _MM_CMPINT_EQ);
277 return _mm512_movm_epi64(mask);
280 EIGEN_STRONG_INLINE __m512d int64_to_double_avx512(__m512i x) {
281 x = padd64(x, _mm512_castpd_si512(_mm512_set1_pd(0x0018000000000000)));
282 return _mm512_sub_pd(_mm512_castsi512_pd(x), _mm512_set1_pd(0x0018000000000000));
285 EIGEN_STRONG_INLINE __m512i double_to_int64_avx512(__m512d x) {
286 x = _mm512_add_pd(_mm512_floor_pd(x), _mm512_set1_pd(0x0018000000000000));
288 _mm512_castpd_si512(x),
289 _mm512_castpd_si512(_mm512_set1_pd(0x0018000000000000))
293 EIGEN_STRONG_INLINE Packet16i pcast64<Packet8d, Packet16i>(
const Packet8d& a)
295 return double_to_int64_avx512(a);
299 EIGEN_STRONG_INLINE Packet8d pcast64<Packet16i, Packet8d>(
const Packet16i& a)
301 return int64_to_double_avx512(a);
304 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
305 Packet8d psin<Packet8d>(
const Packet8d& x)