12#ifndef EIGENRAND_MORE_PACKET_MATH_H
13#define EIGENRAND_MORE_PACKET_MATH_H
18#define EIGENRAND_PRINT_PACKET(p) do { using _MTy = typename std::remove_const<typename std::remove_reference<decltype(p)>::type>::type; typename std::conditional<Eigen::internal::IsFloatPacket<_MTy>::value, float, typename std::conditional<Eigen::internal::IsDoublePacket<_MTy>::value, double, int>::type>::type f[4]; Eigen::internal::pstore(f, p); std::cout << #p " " << f[0] << " " << f[1] << " " << f[2] << " " << f[3] << std::endl; } while(0)
25 struct IsIntPacket : std::false_type {};
28 struct IsFloatPacket : std::false_type {};
31 struct IsDoublePacket : std::false_type {};
36 template<
typename Packet>
37 struct reinterpreter{};
39 template<
typename Packet>
40 inline auto reinterpret_to_float(
const Packet& x)
41 ->
decltype(reinterpreter<Packet>{}.to_float(x))
43 return reinterpreter<Packet>{}.to_float(x);
46 template<
typename Packet>
47 inline auto reinterpret_to_double(
const Packet& x)
48 ->
decltype(reinterpreter<Packet>{}.to_double(x))
50 return reinterpreter<Packet>{}.to_double(x);
53 template<
typename Packet>
54 inline auto reinterpret_to_int(
const Packet& x)
55 ->
decltype(reinterpreter<Packet>{}.to_int(x))
57 return reinterpreter<Packet>{}.to_int(x);
60 template<
typename Packet>
61 EIGEN_STRONG_INLINE
void split_two(
const Packet& p,
typename HalfPacket<Packet>::type& a,
typename HalfPacket<Packet>::type& b);
63 template<
typename Packet>
64 EIGEN_STRONG_INLINE Packet pseti64(uint64_t a);
66 template<
typename Packet>
67 EIGEN_STRONG_INLINE Packet padd64(
const Packet& a,
const Packet& b);
69 template<
typename Packet>
70 EIGEN_STRONG_INLINE Packet psub64(
const Packet& a,
const Packet& b);
72 template <
typename SrcPacket,
typename TgtPacket>
73 EIGEN_STRONG_INLINE TgtPacket pcast64(
const SrcPacket& a);
75 template<
typename Packet>
76 EIGEN_STRONG_INLINE Packet pcmpeq(
const Packet& a,
const Packet& b);
78 template<
typename Packet>
81 template<
int b,
typename Packet>
82 EIGEN_STRONG_INLINE Packet psll(
const Packet& a);
84 template<
int _b,
typename Packet>
85 EIGEN_STRONG_INLINE Packet psrl(
const Packet& a,
int b = _b);
87 template<
int b,
typename Packet>
88 EIGEN_STRONG_INLINE Packet psll64(
const Packet& a);
90 template<
int b,
typename Packet>
91 EIGEN_STRONG_INLINE Packet psrl64(
const Packet& a);
93 template<
typename Packet>
94 EIGEN_STRONG_INLINE
bool predux_all(
const Packet& a);
96 template<
typename Packet>
97 EIGEN_STRONG_INLINE
bool predux_any(
const Packet& a);
111 template<
typename Packet>
112 EIGEN_STRONG_INLINE
int pmovemask(
const Packet& a);
114 template<
typename Packet>
115 EIGEN_STRONG_INLINE
typename std::enable_if<
116 IsFloatPacket<Packet>::value, Packet
117 >::type pext_sign(
const Packet& a)
119 using IntPacket =
decltype(reinterpret_to_int(a));
120 return reinterpret_to_float(
121 pand(reinterpret_to_int(a), pset1<IntPacket>(0x80000000))
125 template<
typename Packet>
126 EIGEN_STRONG_INLINE
typename std::enable_if<
127 IsDoublePacket<Packet>::value, Packet
128 >::type pext_sign(
const Packet& a)
130 using IntPacket =
decltype(reinterpret_to_int(a));
131 return reinterpret_to_double(
132 pand(reinterpret_to_int(a), pseti64<IntPacket>(0x8000000000000000))
149 template<
typename Packet>
150 EIGEN_STRONG_INLINE Packet plgamma_approx(
const Packet& x)
152 auto x_3 = padd(x, pset1<Packet>(3));
153 auto ret = pmul(padd(x_3, pset1<Packet>(-0.5)), plog(x_3));
154 ret = psub(ret, x_3);
155 ret = padd(ret, pset1<Packet>(0.9189385332046727));
156 ret = padd(ret, pdiv(pset1<Packet>(1 / 12.), x_3));
157 ret = psub(ret, plog(pmul(
158 pmul(psub(x_3, pset1<Packet>(1)), psub(x_3, pset1<Packet>(2))), x)));
162 template<
typename Packet>
163 EIGEN_STRONG_INLINE Packet pcmplt(
const Packet& a,
const Packet& b);
165 template<
typename Packet>
166 EIGEN_STRONG_INLINE Packet pcmple(
const Packet& a,
const Packet& b);
168 template<
typename Packet>
169 EIGEN_STRONG_INLINE Packet pbitnot(
const Packet& a);
171 template<
typename PacketIf,
typename Packet>
172 EIGEN_STRONG_INLINE Packet pblendv(
const PacketIf& ifPacket,
const Packet& thenPacket,
const Packet& elsePacket);
174 template<
typename Packet>
175 EIGEN_STRONG_INLINE Packet pgather(
const int* addr,
const Packet& index);
177 template<
typename Packet>
178 EIGEN_STRONG_INLINE
auto pgather(
const float* addr,
const Packet& index) ->
decltype(reinterpret_to_float(std::declval<Packet>()));
180 template<
typename Packet>
181 EIGEN_STRONG_INLINE
auto pgather(
const double* addr,
const Packet& index,
bool upperhalf =
false) ->
decltype(reinterpret_to_double(std::declval<Packet>()));
183 template<
typename Packet>
184 EIGEN_STRONG_INLINE Packet ptruncate(
const Packet& a);
186 template<
typename Packet>
187 EIGEN_STRONG_INLINE Packet pcmpeq64(
const Packet& a,
const Packet& b);
189 template<
typename Packet>
190 EIGEN_STRONG_INLINE Packet pcmplt64(
const Packet& a,
const Packet& b);
192 template<
typename Packet>
193 EIGEN_STRONG_INLINE Packet pmuluadd64(
const Packet& a, uint64_t b, uint64_t c);
195 template<
typename IntPacket>
196 EIGEN_STRONG_INLINE
auto bit_to_ur_float(
const IntPacket& x) ->
decltype(reinterpret_to_float(x))
198 using FloatPacket =
decltype(reinterpret_to_float(x));
200 const IntPacket lower = pset1<IntPacket>(0x7FFFFF),
201 upper = pset1<IntPacket>(127 << 23);
202 const FloatPacket one = pset1<FloatPacket>(1);
204 return psub(reinterpret_to_float(por(pand(x, lower), upper)), one);
207 template<
typename IntPacket>
208 EIGEN_STRONG_INLINE
auto bit_to_ur_double(
const IntPacket& x) ->
decltype(reinterpret_to_double(x))
210 using DoublePacket =
decltype(reinterpret_to_double(x));
212 const IntPacket lower = pseti64<IntPacket>(0xFFFFFFFFFFFFFull),
213 upper = pseti64<IntPacket>(1023ull << 52);
214 const DoublePacket one = pset1<DoublePacket>(1);
216 return psub(reinterpret_to_double(por(pand(x, lower), upper)), one);
219 template<
typename Packet>
220 EIGEN_STRONG_INLINE Packet pnew_andnot(
const Packet& a,
const Packet& b)
222#if defined(EIGEN_VECTORIZE_NEON) || defined(EIGENRAND_EIGEN_34_MODE)
223 return pandnot(a, b);
225 return pandnot(b, a);
229 template<
typename _Scalar>
233 struct BitScalar<float>
235 float to_ur(uint32_t x)
242 u = (x & 0x7FFFFF) | (127 << 23);
246 float to_nzur(uint32_t x)
248 return to_ur(x) + std::numeric_limits<float>::epsilon() / 8;
253 struct BitScalar<double>
255 double to_ur(uint64_t x)
262 u = (x & 0xFFFFFFFFFFFFFull) | (1023ull << 52);
266 double to_nzur(uint64_t x)
268 return to_ur(x) + std::numeric_limits<double>::epsilon() / 8;
278 EIGEN_STRONG_INLINE float2 bit_to_ur_float(uint64_t x)
282 ret.f[0] = bs.to_ur(x & 0xFFFFFFFF);
283 ret.f[1] = bs.to_ur(x >> 32);
287 template<
typename Packet>
288 EIGEN_STRONG_INLINE
typename std::enable_if<
289 IsFloatPacket<Packet>::value
290 >::type psincos(Packet x, Packet& s, Packet& c)
292 Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
293 using IntPacket =
decltype(reinterpret_to_int(x));
294 IntPacket emm0, emm2, emm4;
300 sign_bit_sin = pext_sign(sign_bit_sin);
303 y = pmul(x, pset1<Packet>(1.27323954473516));
306 emm2 = pcast<Packet, IntPacket>(y);
309 emm2 = padd(emm2, pset1<IntPacket>(1));
310 emm2 = pand(emm2, pset1<IntPacket>(~1));
311 y = pcast<IntPacket, Packet>(emm2);
316 emm0 = pand(emm2, pset1<IntPacket>(4));
317 emm0 = psll<29>(emm0);
318 Packet swap_sign_bit_sin = reinterpret_to_float(emm0);
321 emm2 = pand(emm2, pset1<IntPacket>(2));
323 emm2 = pcmpeq(emm2, pset1<IntPacket>(0));
324 Packet poly_mask = reinterpret_to_float(emm2);
328 xmm1 = pset1<Packet>(-0.78515625);
329 xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
330 xmm3 = pset1<Packet>(-3.77489497744594108e-8);
331 xmm1 = pmul(y, xmm1);
332 xmm2 = pmul(y, xmm2);
333 xmm3 = pmul(y, xmm3);
338 emm4 = psub(emm4, pset1<IntPacket>(2));
339 emm4 = pnew_andnot(pset1<IntPacket>(4), emm4);
340 emm4 = psll<29>(emm4);
341 Packet sign_bit_cos = reinterpret_to_float(emm4);
342 sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
346 Packet z = pmul(x, x);
347 y = pset1<Packet>(2.443315711809948E-005);
350 y = padd(y, pset1<Packet>(-1.388731625493765E-003));
352 y = padd(y, pset1<Packet>(4.166664568298827E-002));
355 Packet tmp = pmul(z, pset1<Packet>(0.5));
357 y = padd(y, pset1<Packet>(1));
361 Packet y2 = pset1<Packet>(-1.9515295891E-4);
363 y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
365 y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
372 Packet ysin2 = pand(xmm3, y2);
373 Packet ysin1 = pnew_andnot(y, xmm3);
374 y2 = psub(y2, ysin2);
377 xmm1 = padd(ysin1, ysin2);
381 s = pxor(xmm1, sign_bit_sin);
382 c = pxor(xmm2, sign_bit_cos);
385 template<
typename Packet>
386 EIGEN_STRONG_INLINE
typename std::enable_if<
387 IsDoublePacket<Packet>::value
388 >::type psincos(Packet x, Packet& s, Packet& c)
390 Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
391 using IntPacket =
decltype(reinterpret_to_int(x));
392 IntPacket emm0, emm2, emm4;
398 sign_bit_sin = pext_sign(sign_bit_sin);
401 y = pmul(x, pset1<Packet>(1.27323954473516));
404 emm2 = pcast64<Packet, IntPacket>(y);
407 emm2 = padd64(emm2, pseti64<IntPacket>(1));
408 emm2 = pand(emm2, pseti64<IntPacket>(~1ll));
409 y = pcast64<IntPacket, Packet>(emm2);
414 emm0 = pand(emm2, pseti64<IntPacket>(4));
415 emm0 = psll64<61>(emm0);
416 Packet swap_sign_bit_sin = reinterpret_to_double(emm0);
419 emm2 = pand(emm2, pseti64<IntPacket>(2));
421 emm2 = pcmpeq64(emm2, pseti64<IntPacket>(0));
422 Packet poly_mask = reinterpret_to_double(emm2);
426 xmm1 = pset1<Packet>(-0.78515625);
427 xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
428 xmm3 = pset1<Packet>(-3.77489497744594108e-8);
429 xmm1 = pmul(y, xmm1);
430 xmm2 = pmul(y, xmm2);
431 xmm3 = pmul(y, xmm3);
436 emm4 = psub64(emm4, pseti64<IntPacket>(2));
437 emm4 = pnew_andnot(pseti64<IntPacket>(4), emm4);
438 emm4 = psll64<61>(emm4);
439 Packet sign_bit_cos = reinterpret_to_double(emm4);
440 sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
444 Packet z = pmul(x, x);
445 y = pset1<Packet>(2.443315711809948E-005);
448 y = padd(y, pset1<Packet>(-1.388731625493765E-003));
450 y = padd(y, pset1<Packet>(4.166664568298827E-002));
453 Packet tmp = pmul(z, pset1<Packet>(0.5));
455 y = padd(y, pset1<Packet>(1));
459 Packet y2 = pset1<Packet>(-1.9515295891E-4);
461 y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
463 y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
470 Packet ysin2 = pand(xmm3, y2);
471 Packet ysin1 = pnew_andnot(y, xmm3);
472 y2 = psub(y2, ysin2);
475 xmm1 = padd(ysin1, ysin2);
479 s = pxor(xmm1, sign_bit_sin);
480 c = pxor(xmm2, sign_bit_cos);
483 template<
typename Packet>
484 EIGEN_STRONG_INLINE
typename std::enable_if<
485 IsDoublePacket<Packet>::value, Packet
486 >::type _psin(Packet x)
488 Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
489 using IntPacket =
decltype(reinterpret_to_int(x));
490 IntPacket emm0, emm2;
496 sign_bit_sin = pext_sign(sign_bit_sin);
499 y = pmul(x, pset1<Packet>(1.27323954473516));
502 emm2 = pcast64<Packet, IntPacket>(y);
505 emm2 = padd64(emm2, pseti64<IntPacket>(1));
506 emm2 = pand(emm2, pseti64<IntPacket>(~1ll));
507 y = pcast64<IntPacket, Packet>(emm2);
510 emm0 = pand(emm2, pseti64<IntPacket>(4));
511 emm0 = psll64<61>(emm0);
512 Packet swap_sign_bit_sin = reinterpret_to_double(emm0);
515 emm2 = pand(emm2, pseti64<IntPacket>(2));
517 emm2 = pcmpeq64(emm2, pseti64<IntPacket>(0));
518 Packet poly_mask = reinterpret_to_double(emm2);
522 xmm1 = pset1<Packet>(-0.78515625);
523 xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
524 xmm3 = pset1<Packet>(-3.77489497744594108e-8);
525 xmm1 = pmul(y, xmm1);
526 xmm2 = pmul(y, xmm2);
527 xmm3 = pmul(y, xmm3);
532 sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
536 Packet z = pmul(x, x);
537 y = pset1<Packet>(2.443315711809948E-005);
540 y = padd(y, pset1<Packet>(-1.388731625493765E-003));
542 y = padd(y, pset1<Packet>(4.166664568298827E-002));
545 Packet tmp = pmul(z, pset1<Packet>(0.5));
547 y = padd(y, pset1<Packet>(1));
551 Packet y2 = pset1<Packet>(-1.9515295891E-4);
553 y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
555 y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
562 Packet ysin2 = pand(xmm3, y2);
563 Packet ysin1 = pnew_andnot(y, xmm3);
565 xmm1 = padd(ysin1, ysin2);
568 return pxor(xmm1, sign_bit_sin);
573#ifdef EIGEN_VECTORIZE_AVX512
577#ifdef EIGEN_VECTORIZE_AVX
581#ifdef EIGEN_VECTORIZE_SSE2
585#ifdef EIGEN_VECTORIZE_NEON
593 template<
int b,
typename Packet>
594 EIGEN_STRONG_INLINE Packet psll(
const Packet& a)
596 return BitShifter<Packet>{}.template sll<b>(a);
599 template<
int _b,
typename Packet>
600 EIGEN_STRONG_INLINE Packet psrl(
const Packet& a,
int b)
602 return BitShifter<Packet>{}.template srl<_b>(a, b);
605 template<
int b,
typename Packet>
606 EIGEN_STRONG_INLINE Packet psll64(
const Packet& a)
608 return BitShifter<Packet>{}.template sll64<b>(a);
611 template<
int b,
typename Packet>
612 EIGEN_STRONG_INLINE Packet psrl64(
const Packet& a)
614 return BitShifter<Packet>{}.template srl64<b>(a);