12 #ifndef EIGENRAND_MORE_PACKET_MATH_H
13 #define EIGENRAND_MORE_PACKET_MATH_H
15 #include <Eigen/Dense>
17 #define EIGENRAND_PRINT_PACKET(p) do { using _MTy = typename std::remove_const<typename std::remove_reference<decltype(p)>::type>::type; typename std::conditional<Eigen::internal::IsFloatPacket<_MTy>::value, float, typename std::conditional<Eigen::internal::IsDoublePacket<_MTy>::value, double, int>::type>::type f[4]; Eigen::internal::pstore(f, p); std::cout << #p " " << f[0] << " " << f[1] << " " << f[2] << " " << f[3] << std::endl; } while(0)
24 struct IsIntPacket : std::false_type {};
27 struct IsFloatPacket : std::false_type {};
30 struct IsDoublePacket : std::false_type {};
35 template<
typename Packet>
36 struct reinterpreter{};
38 template<
typename Packet>
39 inline auto reinterpret_to_float(
const Packet& x)
40 -> decltype(reinterpreter<Packet>{}.to_float(x))
42 return reinterpreter<Packet>{}.to_float(x);
45 template<
typename Packet>
46 inline auto reinterpret_to_double(
const Packet& x)
47 -> decltype(reinterpreter<Packet>{}.to_double(x))
49 return reinterpreter<Packet>{}.to_double(x);
52 template<
typename Packet>
53 inline auto reinterpret_to_int(
const Packet& x)
54 -> decltype(reinterpreter<Packet>{}.to_int(x))
56 return reinterpreter<Packet>{}.to_int(x);
59 template<
typename Packet>
60 EIGEN_STRONG_INLINE
void split_two(
const Packet& p,
typename HalfPacket<Packet>::type& a,
typename HalfPacket<Packet>::type& b);
62 template<
typename Packet>
63 EIGEN_STRONG_INLINE Packet pseti64(uint64_t a);
65 template<
typename Packet>
66 EIGEN_STRONG_INLINE Packet padd64(
const Packet& a,
const Packet& b);
68 template<
typename Packet>
69 EIGEN_STRONG_INLINE Packet psub64(
const Packet& a,
const Packet& b);
71 template <
typename SrcPacket,
typename TgtPacket>
72 EIGEN_STRONG_INLINE TgtPacket pcast64(
const SrcPacket& a);
74 template<
typename Packet>
75 EIGEN_STRONG_INLINE Packet pcmpeq(
const Packet& a,
const Packet& b);
77 template<
typename Packet>
80 template<
int b,
typename Packet>
81 EIGEN_STRONG_INLINE Packet psll(
const Packet& a);
83 template<
int _b,
typename Packet>
84 EIGEN_STRONG_INLINE Packet psrl(
const Packet& a,
int b = _b);
86 template<
int b,
typename Packet>
87 EIGEN_STRONG_INLINE Packet psll64(
const Packet& a);
89 template<
int b,
typename Packet>
90 EIGEN_STRONG_INLINE Packet psrl64(
const Packet& a);
104 template<
typename Packet>
105 EIGEN_STRONG_INLINE
int pmovemask(
const Packet& a);
107 template<
typename Packet>
108 EIGEN_STRONG_INLINE
typename std::enable_if<
109 IsFloatPacket<Packet>::value, Packet
110 >::type pext_sign(
const Packet& a)
112 using IntPacket = decltype(reinterpret_to_int(a));
113 return reinterpret_to_float(
114 pand(reinterpret_to_int(a), pset1<IntPacket>(0x80000000))
118 template<
typename Packet>
119 EIGEN_STRONG_INLINE
typename std::enable_if<
120 IsDoublePacket<Packet>::value, Packet
121 >::type pext_sign(
const Packet& a)
123 using IntPacket = decltype(reinterpret_to_int(a));
124 return reinterpret_to_double(
125 pand(reinterpret_to_int(a), pseti64<IntPacket>(0x8000000000000000))
142 template<
typename Packet>
143 EIGEN_STRONG_INLINE Packet plgamma_approx(
const Packet& x)
145 auto x_3 = padd(x, pset1<Packet>(3));
146 auto ret = pmul(padd(x_3, pset1<Packet>(-0.5)), plog(x_3));
147 ret = psub(ret, x_3);
148 ret = padd(ret, pset1<Packet>(0.9189385332046727));
149 ret = padd(ret, pdiv(pset1<Packet>(1 / 12.), x_3));
150 ret = psub(ret, plog(pmul(
151 pmul(psub(x_3, pset1<Packet>(1)), psub(x_3, pset1<Packet>(2))), x)));
155 template<
typename Packet>
156 EIGEN_STRONG_INLINE Packet pcmplt(
const Packet& a,
const Packet& b);
158 template<
typename Packet>
159 EIGEN_STRONG_INLINE Packet pcmple(
const Packet& a,
const Packet& b);
161 template<
typename Packet>
162 EIGEN_STRONG_INLINE Packet pbitnot(
const Packet& a);
164 template<
typename PacketIf,
typename Packet>
165 EIGEN_STRONG_INLINE Packet pblendv(
const PacketIf& ifPacket,
const Packet& thenPacket,
const Packet& elsePacket);
167 template<
typename Packet>
168 EIGEN_STRONG_INLINE Packet pgather(
const int* addr,
const Packet& index);
170 template<
typename Packet>
171 EIGEN_STRONG_INLINE
auto pgather(
const float* addr,
const Packet& index) -> decltype(reinterpret_to_float(std::declval<Packet>()));
173 template<
typename Packet>
174 EIGEN_STRONG_INLINE
auto pgather(
const double* addr,
const Packet& index,
bool upperhalf =
false) -> decltype(reinterpret_to_double(std::declval<Packet>()));
176 template<
typename Packet>
177 EIGEN_STRONG_INLINE Packet ptruncate(
const Packet& a);
179 template<
typename Packet>
180 EIGEN_STRONG_INLINE Packet pcmpeq64(
const Packet& a,
const Packet& b);
182 template<
typename Packet>
183 EIGEN_STRONG_INLINE Packet pcmplt64(
const Packet& a,
const Packet& b);
185 template<
typename Packet>
186 EIGEN_STRONG_INLINE Packet pmuluadd64(
const Packet& a, uint64_t b, uint64_t c);
188 template<
typename IntPacket>
189 EIGEN_STRONG_INLINE
auto bit_to_ur_float(
const IntPacket& x) -> decltype(reinterpret_to_float(x))
191 using FloatPacket = decltype(reinterpret_to_float(x));
193 const IntPacket lower = pset1<IntPacket>(0x7FFFFF),
194 upper = pset1<IntPacket>(127 << 23);
195 const FloatPacket one = pset1<FloatPacket>(1);
197 return psub(reinterpret_to_float(por(pand(x, lower), upper)), one);
200 template<
typename IntPacket>
201 EIGEN_STRONG_INLINE
auto bit_to_ur_double(
const IntPacket& x) -> decltype(reinterpret_to_double(x))
203 using DoublePacket = decltype(reinterpret_to_double(x));
205 const IntPacket lower = pseti64<IntPacket>(0xFFFFFFFFFFFFFull),
206 upper = pseti64<IntPacket>(1023ull << 52);
207 const DoublePacket one = pset1<DoublePacket>(1);
209 return psub(reinterpret_to_double(por(pand(x, lower), upper)), one);
212 template<
typename _Scalar>
216 struct BitScalar<float>
218 float to_ur(uint32_t x)
225 u = (x & 0x7FFFFF) | (127 << 23);
229 float to_nzur(uint32_t x)
231 return to_ur(x) + std::numeric_limits<float>::epsilon() / 8;
236 struct BitScalar<double>
238 double to_ur(uint64_t x)
245 u = (x & 0xFFFFFFFFFFFFFull) | (1023ull << 52);
249 double to_nzur(uint64_t x)
251 return to_ur(x) + std::numeric_limits<double>::epsilon() / 8;
261 EIGEN_STRONG_INLINE float2 bit_to_ur_float(uint64_t x)
265 ret.f[0] = bs.to_ur(x & 0xFFFFFFFF);
266 ret.f[1] = bs.to_ur(x >> 32);
270 template<
typename Packet>
271 EIGEN_STRONG_INLINE
typename std::enable_if<
272 IsFloatPacket<Packet>::value
273 >::type psincos(Packet x, Packet& s, Packet& c)
275 Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
276 using IntPacket = decltype(reinterpret_to_int(x));
277 IntPacket emm0, emm2, emm4;
283 sign_bit_sin = pext_sign(sign_bit_sin);
286 y = pmul(x, pset1<Packet>(1.27323954473516));
289 emm2 = pcast<Packet, IntPacket>(y);
292 emm2 = padd(emm2, pset1<IntPacket>(1));
293 emm2 = pand(emm2, pset1<IntPacket>(~1));
294 y = pcast<IntPacket, Packet>(emm2);
299 emm0 = pand(emm2, pset1<IntPacket>(4));
300 emm0 = psll<29>(emm0);
301 Packet swap_sign_bit_sin = reinterpret_to_float(emm0);
304 emm2 = pand(emm2, pset1<IntPacket>(2));
306 emm2 = pcmpeq(emm2, pset1<IntPacket>(0));
307 Packet poly_mask = reinterpret_to_float(emm2);
311 xmm1 = pset1<Packet>(-0.78515625);
312 xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
313 xmm3 = pset1<Packet>(-3.77489497744594108e-8);
314 xmm1 = pmul(y, xmm1);
315 xmm2 = pmul(y, xmm2);
316 xmm3 = pmul(y, xmm3);
321 emm4 = psub(emm4, pset1<IntPacket>(2));
322 #if defined(EIGEN_VECTORIZE_NEON) || defined(EIGENRAND_EIGEN_34_MODE)
323 emm4 = pandnot(pset1<IntPacket>(4), emm4);
325 emm4 = pandnot(emm4, pset1<IntPacket>(4));
327 emm4 = psll<29>(emm4);
328 Packet sign_bit_cos = reinterpret_to_float(emm4);
329 sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
333 Packet z = pmul(x, x);
334 y = pset1<Packet>(2.443315711809948E-005);
337 y = padd(y, pset1<Packet>(-1.388731625493765E-003));
339 y = padd(y, pset1<Packet>(4.166664568298827E-002));
342 Packet tmp = pmul(z, pset1<Packet>(0.5));
344 y = padd(y, pset1<Packet>(1));
348 Packet y2 = pset1<Packet>(-1.9515295891E-4);
350 y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
352 y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
359 Packet ysin2 = pand(xmm3, y2);
360 #if defined(EIGEN_VECTORIZE_NEON) || defined(EIGENRAND_EIGEN_34_MODE)
361 Packet ysin1 = pandnot(y, xmm3);
363 Packet ysin1 = pandnot(xmm3, y);
365 y2 = psub(y2, ysin2);
368 xmm1 = padd(ysin1, ysin2);
372 s = pxor(xmm1, sign_bit_sin);
373 c = pxor(xmm2, sign_bit_cos);
376 template<
typename Packet>
377 EIGEN_STRONG_INLINE
typename std::enable_if<
378 IsDoublePacket<Packet>::value
379 >::type psincos(Packet x, Packet& s, Packet& c)
381 Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
382 using IntPacket = decltype(reinterpret_to_int(x));
383 IntPacket emm0, emm2, emm4;
389 sign_bit_sin = pext_sign(sign_bit_sin);
392 y = pmul(x, pset1<Packet>(1.27323954473516));
395 emm2 = pcast64<Packet, IntPacket>(y);
398 emm2 = padd64(emm2, pseti64<IntPacket>(1));
399 emm2 = pand(emm2, pseti64<IntPacket>(~1ll));
400 y = pcast64<IntPacket, Packet>(emm2);
405 emm0 = pand(emm2, pseti64<IntPacket>(4));
406 emm0 = psll64<61>(emm0);
407 Packet swap_sign_bit_sin = reinterpret_to_double(emm0);
410 emm2 = pand(emm2, pseti64<IntPacket>(2));
412 emm2 = pcmpeq64(emm2, pseti64<IntPacket>(0));
413 Packet poly_mask = reinterpret_to_double(emm2);
417 xmm1 = pset1<Packet>(-0.78515625);
418 xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
419 xmm3 = pset1<Packet>(-3.77489497744594108e-8);
420 xmm1 = pmul(y, xmm1);
421 xmm2 = pmul(y, xmm2);
422 xmm3 = pmul(y, xmm3);
427 emm4 = psub64(emm4, pseti64<IntPacket>(2));
428 #if defined(EIGEN_VECTORIZE_NEON) || defined(EIGENRAND_EIGEN_34_MODE)
429 emm4 = pandnot(pseti64<IntPacket>(4), emm4);
431 emm4 = pandnot(emm4, pseti64<IntPacket>(4));
433 emm4 = psll64<61>(emm4);
434 Packet sign_bit_cos = reinterpret_to_double(emm4);
435 sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
439 Packet z = pmul(x, x);
440 y = pset1<Packet>(2.443315711809948E-005);
443 y = padd(y, pset1<Packet>(-1.388731625493765E-003));
445 y = padd(y, pset1<Packet>(4.166664568298827E-002));
448 Packet tmp = pmul(z, pset1<Packet>(0.5));
450 y = padd(y, pset1<Packet>(1));
454 Packet y2 = pset1<Packet>(-1.9515295891E-4);
456 y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
458 y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
465 Packet ysin2 = pand(xmm3, y2);
466 #if defined(EIGEN_VECTORIZE_NEON) || defined(EIGENRAND_EIGEN_34_MODE)
467 Packet ysin1 = pandnot(y, xmm3);
469 Packet ysin1 = pandnot(xmm3, y);
471 y2 = psub(y2, ysin2);
474 xmm1 = padd(ysin1, ysin2);
478 s = pxor(xmm1, sign_bit_sin);
479 c = pxor(xmm2, sign_bit_cos);
482 template<
typename Packet>
483 EIGEN_STRONG_INLINE
typename std::enable_if<
484 IsDoublePacket<Packet>::value, Packet
485 >::type _psin(Packet x)
487 Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
488 using IntPacket = decltype(reinterpret_to_int(x));
489 IntPacket emm0, emm2;
495 sign_bit_sin = pext_sign(sign_bit_sin);
498 y = pmul(x, pset1<Packet>(1.27323954473516));
501 emm2 = pcast64<Packet, IntPacket>(y);
504 emm2 = padd64(emm2, pseti64<IntPacket>(1));
505 emm2 = pand(emm2, pseti64<IntPacket>(~1ll));
506 y = pcast64<IntPacket, Packet>(emm2);
509 emm0 = pand(emm2, pseti64<IntPacket>(4));
510 emm0 = psll64<61>(emm0);
511 Packet swap_sign_bit_sin = reinterpret_to_double(emm0);
514 emm2 = pand(emm2, pseti64<IntPacket>(2));
516 emm2 = pcmpeq64(emm2, pseti64<IntPacket>(0));
517 Packet poly_mask = reinterpret_to_double(emm2);
521 xmm1 = pset1<Packet>(-0.78515625);
522 xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
523 xmm3 = pset1<Packet>(-3.77489497744594108e-8);
524 xmm1 = pmul(y, xmm1);
525 xmm2 = pmul(y, xmm2);
526 xmm3 = pmul(y, xmm3);
531 sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
535 Packet z = pmul(x, x);
536 y = pset1<Packet>(2.443315711809948E-005);
539 y = padd(y, pset1<Packet>(-1.388731625493765E-003));
541 y = padd(y, pset1<Packet>(4.166664568298827E-002));
544 Packet tmp = pmul(z, pset1<Packet>(0.5));
546 y = padd(y, pset1<Packet>(1));
550 Packet y2 = pset1<Packet>(-1.9515295891E-4);
552 y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
554 y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
561 Packet ysin2 = pand(xmm3, y2);
562 #if defined(EIGEN_VECTORIZE_NEON) || defined(EIGENRAND_EIGEN_34_MODE)
563 Packet ysin1 = pandnot(y, xmm3);
565 Packet ysin1 = pandnot(xmm3, y);
568 xmm1 = padd(ysin1, ysin2);
571 return pxor(xmm1, sign_bit_sin);
576 #ifdef EIGEN_VECTORIZE_AVX
580 #ifdef EIGEN_VECTORIZE_SSE2
584 #ifdef EIGEN_VECTORIZE_NEON
592 template<
int b,
typename Packet>
593 EIGEN_STRONG_INLINE Packet psll(
const Packet& a)
595 return BitShifter<Packet>{}.template sll<b>(a);
598 template<
int _b,
typename Packet>
599 EIGEN_STRONG_INLINE Packet psrl(
const Packet& a,
int b)
601 return BitShifter<Packet>{}.template srl<_b>(a, b);
604 template<
int b,
typename Packet>
605 EIGEN_STRONG_INLINE Packet psll64(
const Packet& a)
607 return BitShifter<Packet>{}.template sll64<b>(a);
610 template<
int b,
typename Packet>
611 EIGEN_STRONG_INLINE Packet psrl64(
const Packet& a)
613 return BitShifter<Packet>{}.template srl64<b>(a);