12#ifndef EIGENRAND_MORE_PACKET_MATH_H
13#define EIGENRAND_MORE_PACKET_MATH_H
17#define EIGENRAND_PRINT_PACKET(p) do { using _MTy = typename std::remove_const<typename std::remove_reference<decltype(p)>::type>::type; typename std::conditional<Eigen::internal::IsFloatPacket<_MTy>::value, float, typename std::conditional<Eigen::internal::IsDoublePacket<_MTy>::value, double, int>::type>::type f[4]; Eigen::internal::pstore(f, p); std::cout << #p " " << f[0] << " " << f[1] << " " << f[2] << " " << f[3] << std::endl; } while(0)
24 struct IsIntPacket : std::false_type {};
27 struct IsFloatPacket : std::false_type {};
30 struct IsDoublePacket : std::false_type {};
35 template<
typename Packet>
36 struct reinterpreter{};
38 template<
typename Packet>
39 inline auto reinterpret_to_float(
const Packet& x)
40 ->
decltype(reinterpreter<Packet>{}.to_float(x))
42 return reinterpreter<Packet>{}.to_float(x);
45 template<
typename Packet>
46 inline auto reinterpret_to_double(
const Packet& x)
47 ->
decltype(reinterpreter<Packet>{}.to_double(x))
49 return reinterpreter<Packet>{}.to_double(x);
52 template<
typename Packet>
53 inline auto reinterpret_to_int(
const Packet& x)
54 ->
decltype(reinterpreter<Packet>{}.to_int(x))
56 return reinterpreter<Packet>{}.to_int(x);
59 template<
typename Packet>
60 EIGEN_STRONG_INLINE
void split_two(
const Packet& p,
typename HalfPacket<Packet>::type& a,
typename HalfPacket<Packet>::type& b);
62 template<
typename Packet>
63 EIGEN_STRONG_INLINE Packet pseti64(uint64_t a);
65 template<
typename Packet>
66 EIGEN_STRONG_INLINE Packet padd64(
const Packet& a,
const Packet& b);
68 template<
typename Packet>
69 EIGEN_STRONG_INLINE Packet psub64(
const Packet& a,
const Packet& b);
71 template <
typename SrcPacket,
typename TgtPacket>
72 EIGEN_STRONG_INLINE TgtPacket pcast64(
const SrcPacket& a);
74 template<
typename Packet>
75 EIGEN_STRONG_INLINE Packet pcmpeq(
const Packet& a,
const Packet& b);
77 template<
typename Packet>
80 template<
int b,
typename Packet>
81 EIGEN_STRONG_INLINE Packet psll(
const Packet& a);
83 template<
int _b,
typename Packet>
84 EIGEN_STRONG_INLINE Packet psrl(
const Packet& a,
int b = _b);
86 template<
int b,
typename Packet>
87 EIGEN_STRONG_INLINE Packet psll64(
const Packet& a);
89 template<
int b,
typename Packet>
90 EIGEN_STRONG_INLINE Packet psrl64(
const Packet& a);
92 template<
typename Packet>
93 EIGEN_STRONG_INLINE
bool predux_all(
const Packet& a);
95 template<
typename Packet>
96 EIGEN_STRONG_INLINE
bool predux_any(
const Packet& a);
110 template<
typename Packet>
111 EIGEN_STRONG_INLINE
int pmovemask(
const Packet& a);
113 template<
typename Packet>
114 EIGEN_STRONG_INLINE
typename std::enable_if<
115 IsFloatPacket<Packet>::value, Packet
116 >::type pext_sign(
const Packet& a)
118 using IntPacket =
decltype(reinterpret_to_int(a));
119 return reinterpret_to_float(
120 pand(reinterpret_to_int(a), pset1<IntPacket>(0x80000000))
124 template<
typename Packet>
125 EIGEN_STRONG_INLINE
typename std::enable_if<
126 IsDoublePacket<Packet>::value, Packet
127 >::type pext_sign(
const Packet& a)
129 using IntPacket =
decltype(reinterpret_to_int(a));
130 return reinterpret_to_double(
131 pand(reinterpret_to_int(a), pseti64<IntPacket>(0x8000000000000000))
148 template<
typename Packet>
149 EIGEN_STRONG_INLINE Packet plgamma_approx(
const Packet& x)
151 auto x_3 = padd(x, pset1<Packet>(3));
152 auto ret = pmul(padd(x_3, pset1<Packet>(-0.5)), plog(x_3));
153 ret = psub(ret, x_3);
154 ret = padd(ret, pset1<Packet>(0.9189385332046727));
155 ret = padd(ret, pdiv(pset1<Packet>(1 / 12.), x_3));
156 ret = psub(ret, plog(pmul(
157 pmul(psub(x_3, pset1<Packet>(1)), psub(x_3, pset1<Packet>(2))), x)));
161 template<
typename Packet>
162 EIGEN_STRONG_INLINE Packet pcmplt(
const Packet& a,
const Packet& b);
164 template<
typename Packet>
165 EIGEN_STRONG_INLINE Packet pcmple(
const Packet& a,
const Packet& b);
167 template<
typename Packet>
168 EIGEN_STRONG_INLINE Packet pbitnot(
const Packet& a);
170 template<
typename PacketIf,
typename Packet>
171 EIGEN_STRONG_INLINE Packet pblendv(
const PacketIf& ifPacket,
const Packet& thenPacket,
const Packet& elsePacket);
173 template<
typename Packet>
174 EIGEN_STRONG_INLINE Packet pgather(
const int* addr,
const Packet& index);
176 template<
typename Packet>
177 EIGEN_STRONG_INLINE
auto pgather(
const float* addr,
const Packet& index) ->
decltype(reinterpret_to_float(std::declval<Packet>()));
179 template<
typename Packet>
180 EIGEN_STRONG_INLINE
auto pgather(
const double* addr,
const Packet& index,
bool upperhalf =
false) ->
decltype(reinterpret_to_double(std::declval<Packet>()));
182 template<
typename Packet>
183 EIGEN_STRONG_INLINE Packet ptruncate(
const Packet& a);
185 template<
typename Packet>
186 EIGEN_STRONG_INLINE Packet pcmpeq64(
const Packet& a,
const Packet& b);
188 template<
typename Packet>
189 EIGEN_STRONG_INLINE Packet pcmplt64(
const Packet& a,
const Packet& b);
191 template<
typename Packet>
192 EIGEN_STRONG_INLINE Packet pmuluadd64(
const Packet& a, uint64_t b, uint64_t c);
194 template<
typename IntPacket>
195 EIGEN_STRONG_INLINE
auto bit_to_ur_float(
const IntPacket& x) ->
decltype(reinterpret_to_float(x))
197 using FloatPacket =
decltype(reinterpret_to_float(x));
199 const IntPacket lower = pset1<IntPacket>(0x7FFFFF),
200 upper = pset1<IntPacket>(127 << 23);
201 const FloatPacket one = pset1<FloatPacket>(1);
203 return psub(reinterpret_to_float(por(pand(x, lower), upper)), one);
206 template<
typename IntPacket>
207 EIGEN_STRONG_INLINE
auto bit_to_ur_double(
const IntPacket& x) ->
decltype(reinterpret_to_double(x))
209 using DoublePacket =
decltype(reinterpret_to_double(x));
211 const IntPacket lower = pseti64<IntPacket>(0xFFFFFFFFFFFFFull),
212 upper = pseti64<IntPacket>(1023ull << 52);
213 const DoublePacket one = pset1<DoublePacket>(1);
215 return psub(reinterpret_to_double(por(pand(x, lower), upper)), one);
218 template<
typename Packet>
219 EIGEN_STRONG_INLINE Packet pnew_andnot(
const Packet& a,
const Packet& b)
221#if defined(EIGEN_VECTORIZE_NEON) || defined(EIGENRAND_EIGEN_34_MODE)
222 return pandnot(a, b);
224 return pandnot(b, a);
228 template<
typename _Scalar>
232 struct BitScalar<float>
234 float to_ur(uint32_t x)
241 u = (x & 0x7FFFFF) | (127 << 23);
245 float to_nzur(uint32_t x)
247 return to_ur(x) + std::numeric_limits<float>::epsilon() / 8;
252 struct BitScalar<double>
254 double to_ur(uint64_t x)
261 u = (x & 0xFFFFFFFFFFFFFull) | (1023ull << 52);
265 double to_nzur(uint64_t x)
267 return to_ur(x) + std::numeric_limits<double>::epsilon() / 8;
277 EIGEN_STRONG_INLINE float2 bit_to_ur_float(uint64_t x)
281 ret.f[0] = bs.to_ur(x & 0xFFFFFFFF);
282 ret.f[1] = bs.to_ur(x >> 32);
286 template<
typename Packet>
287 EIGEN_STRONG_INLINE
typename std::enable_if<
288 IsFloatPacket<Packet>::value
289 >::type psincos(Packet x, Packet& s, Packet& c)
291 Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
292 using IntPacket =
decltype(reinterpret_to_int(x));
293 IntPacket emm0, emm2, emm4;
299 sign_bit_sin = pext_sign(sign_bit_sin);
302 y = pmul(x, pset1<Packet>(1.27323954473516));
305 emm2 = pcast<Packet, IntPacket>(y);
308 emm2 = padd(emm2, pset1<IntPacket>(1));
309 emm2 = pand(emm2, pset1<IntPacket>(~1));
310 y = pcast<IntPacket, Packet>(emm2);
315 emm0 = pand(emm2, pset1<IntPacket>(4));
316 emm0 = psll<29>(emm0);
317 Packet swap_sign_bit_sin = reinterpret_to_float(emm0);
320 emm2 = pand(emm2, pset1<IntPacket>(2));
322 emm2 = pcmpeq(emm2, pset1<IntPacket>(0));
323 Packet poly_mask = reinterpret_to_float(emm2);
327 xmm1 = pset1<Packet>(-0.78515625);
328 xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
329 xmm3 = pset1<Packet>(-3.77489497744594108e-8);
330 xmm1 = pmul(y, xmm1);
331 xmm2 = pmul(y, xmm2);
332 xmm3 = pmul(y, xmm3);
337 emm4 = psub(emm4, pset1<IntPacket>(2));
338 emm4 = pnew_andnot(pset1<IntPacket>(4), emm4);
339 emm4 = psll<29>(emm4);
340 Packet sign_bit_cos = reinterpret_to_float(emm4);
341 sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
345 Packet z = pmul(x, x);
346 y = pset1<Packet>(2.443315711809948E-005);
349 y = padd(y, pset1<Packet>(-1.388731625493765E-003));
351 y = padd(y, pset1<Packet>(4.166664568298827E-002));
354 Packet tmp = pmul(z, pset1<Packet>(0.5));
356 y = padd(y, pset1<Packet>(1));
360 Packet y2 = pset1<Packet>(-1.9515295891E-4);
362 y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
364 y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
371 Packet ysin2 = pand(xmm3, y2);
372 Packet ysin1 = pnew_andnot(y, xmm3);
373 y2 = psub(y2, ysin2);
376 xmm1 = padd(ysin1, ysin2);
380 s = pxor(xmm1, sign_bit_sin);
381 c = pxor(xmm2, sign_bit_cos);
384 template<
typename Packet>
385 EIGEN_STRONG_INLINE
typename std::enable_if<
386 IsDoublePacket<Packet>::value
387 >::type psincos(Packet x, Packet& s, Packet& c)
389 Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
390 using IntPacket =
decltype(reinterpret_to_int(x));
391 IntPacket emm0, emm2, emm4;
397 sign_bit_sin = pext_sign(sign_bit_sin);
400 y = pmul(x, pset1<Packet>(1.27323954473516));
403 emm2 = pcast64<Packet, IntPacket>(y);
406 emm2 = padd64(emm2, pseti64<IntPacket>(1));
407 emm2 = pand(emm2, pseti64<IntPacket>(~1ll));
408 y = pcast64<IntPacket, Packet>(emm2);
413 emm0 = pand(emm2, pseti64<IntPacket>(4));
414 emm0 = psll64<61>(emm0);
415 Packet swap_sign_bit_sin = reinterpret_to_double(emm0);
418 emm2 = pand(emm2, pseti64<IntPacket>(2));
420 emm2 = pcmpeq64(emm2, pseti64<IntPacket>(0));
421 Packet poly_mask = reinterpret_to_double(emm2);
425 xmm1 = pset1<Packet>(-0.78515625);
426 xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
427 xmm3 = pset1<Packet>(-3.77489497744594108e-8);
428 xmm1 = pmul(y, xmm1);
429 xmm2 = pmul(y, xmm2);
430 xmm3 = pmul(y, xmm3);
435 emm4 = psub64(emm4, pseti64<IntPacket>(2));
436 emm4 = pnew_andnot(pseti64<IntPacket>(4), emm4);
437 emm4 = psll64<61>(emm4);
438 Packet sign_bit_cos = reinterpret_to_double(emm4);
439 sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
443 Packet z = pmul(x, x);
444 y = pset1<Packet>(2.443315711809948E-005);
447 y = padd(y, pset1<Packet>(-1.388731625493765E-003));
449 y = padd(y, pset1<Packet>(4.166664568298827E-002));
452 Packet tmp = pmul(z, pset1<Packet>(0.5));
454 y = padd(y, pset1<Packet>(1));
458 Packet y2 = pset1<Packet>(-1.9515295891E-4);
460 y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
462 y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
469 Packet ysin2 = pand(xmm3, y2);
470 Packet ysin1 = pnew_andnot(y, xmm3);
471 y2 = psub(y2, ysin2);
474 xmm1 = padd(ysin1, ysin2);
478 s = pxor(xmm1, sign_bit_sin);
479 c = pxor(xmm2, sign_bit_cos);
482 template<
typename Packet>
483 EIGEN_STRONG_INLINE
typename std::enable_if<
484 IsDoublePacket<Packet>::value, Packet
485 >::type _psin(Packet x)
487 Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
488 using IntPacket =
decltype(reinterpret_to_int(x));
489 IntPacket emm0, emm2;
495 sign_bit_sin = pext_sign(sign_bit_sin);
498 y = pmul(x, pset1<Packet>(1.27323954473516));
501 emm2 = pcast64<Packet, IntPacket>(y);
504 emm2 = padd64(emm2, pseti64<IntPacket>(1));
505 emm2 = pand(emm2, pseti64<IntPacket>(~1ll));
506 y = pcast64<IntPacket, Packet>(emm2);
509 emm0 = pand(emm2, pseti64<IntPacket>(4));
510 emm0 = psll64<61>(emm0);
511 Packet swap_sign_bit_sin = reinterpret_to_double(emm0);
514 emm2 = pand(emm2, pseti64<IntPacket>(2));
516 emm2 = pcmpeq64(emm2, pseti64<IntPacket>(0));
517 Packet poly_mask = reinterpret_to_double(emm2);
521 xmm1 = pset1<Packet>(-0.78515625);
522 xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
523 xmm3 = pset1<Packet>(-3.77489497744594108e-8);
524 xmm1 = pmul(y, xmm1);
525 xmm2 = pmul(y, xmm2);
526 xmm3 = pmul(y, xmm3);
531 sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
535 Packet z = pmul(x, x);
536 y = pset1<Packet>(2.443315711809948E-005);
539 y = padd(y, pset1<Packet>(-1.388731625493765E-003));
541 y = padd(y, pset1<Packet>(4.166664568298827E-002));
544 Packet tmp = pmul(z, pset1<Packet>(0.5));
546 y = padd(y, pset1<Packet>(1));
550 Packet y2 = pset1<Packet>(-1.9515295891E-4);
552 y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
554 y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
561 Packet ysin2 = pand(xmm3, y2);
562 Packet ysin1 = pnew_andnot(y, xmm3);
564 xmm1 = padd(ysin1, ysin2);
567 return pxor(xmm1, sign_bit_sin);
572#ifdef EIGEN_VECTORIZE_AVX
576#ifdef EIGEN_VECTORIZE_SSE2
580#ifdef EIGEN_VECTORIZE_NEON
588 template<
int b,
typename Packet>
589 EIGEN_STRONG_INLINE Packet psll(
const Packet& a)
591 return BitShifter<Packet>{}.template sll<b>(a);
594 template<
int _b,
typename Packet>
595 EIGEN_STRONG_INLINE Packet psrl(
const Packet& a,
int b)
597 return BitShifter<Packet>{}.template srl<_b>(a, b);
600 template<
int b,
typename Packet>
601 EIGEN_STRONG_INLINE Packet psll64(
const Packet& a)
603 return BitShifter<Packet>{}.template sll64<b>(a);
606 template<
int b,
typename Packet>
607 EIGEN_STRONG_INLINE Packet psrl64(
const Packet& a)
609 return BitShifter<Packet>{}.template srl64<b>(a);