EigenRand  0.5.0
 
Loading...
Searching...
No Matches
MorePacketMath.h
Go to the documentation of this file.
1
12#ifndef EIGENRAND_MORE_PACKET_MATH_H
13#define EIGENRAND_MORE_PACKET_MATH_H
14
15#include <Eigen/Dense>
16
17#define EIGENRAND_PRINT_PACKET(p) do { using _MTy = typename std::remove_const<typename std::remove_reference<decltype(p)>::type>::type; typename std::conditional<Eigen::internal::IsFloatPacket<_MTy>::value, float, typename std::conditional<Eigen::internal::IsDoublePacket<_MTy>::value, double, int>::type>::type f[4]; Eigen::internal::pstore(f, p); std::cout << #p " " << f[0] << " " << f[1] << " " << f[2] << " " << f[3] << std::endl; } while(0)
18
19namespace Eigen
20{
21 namespace internal
22 {
23 template<typename Ty>
24 struct IsIntPacket : std::false_type {};
25
26 template<typename Ty>
27 struct IsFloatPacket : std::false_type {};
28
29 template<typename Ty>
30 struct IsDoublePacket : std::false_type {};
31
32 template<typename Ty>
33 struct HalfPacket;
34
35 template<typename Packet>
36 struct reinterpreter{};
37
38 template<typename Packet>
39 inline auto reinterpret_to_float(const Packet& x)
40 -> decltype(reinterpreter<Packet>{}.to_float(x))
41 {
42 return reinterpreter<Packet>{}.to_float(x);
43 }
44
45 template<typename Packet>
46 inline auto reinterpret_to_double(const Packet& x)
47 -> decltype(reinterpreter<Packet>{}.to_double(x))
48 {
49 return reinterpreter<Packet>{}.to_double(x);
50 }
51
52 template<typename Packet>
53 inline auto reinterpret_to_int(const Packet& x)
54 -> decltype(reinterpreter<Packet>{}.to_int(x))
55 {
56 return reinterpreter<Packet>{}.to_int(x);
57 }
58
59 template<typename Packet>
60 EIGEN_STRONG_INLINE void split_two(const Packet& p, typename HalfPacket<Packet>::type& a, typename HalfPacket<Packet>::type& b);
61
62 template<typename Packet>
63 EIGEN_STRONG_INLINE Packet pseti64(uint64_t a);
64
65 template<typename Packet>
66 EIGEN_STRONG_INLINE Packet padd64(const Packet& a, const Packet& b);
67
68 template<typename Packet>
69 EIGEN_STRONG_INLINE Packet psub64(const Packet& a, const Packet& b);
70
71 template <typename SrcPacket, typename TgtPacket>
72 EIGEN_STRONG_INLINE TgtPacket pcast64(const SrcPacket& a);
73
74 template<typename Packet>
75 EIGEN_STRONG_INLINE Packet pcmpeq(const Packet& a, const Packet& b);
76
77 template<typename Packet>
78 struct BitShifter {};
79
80 template<int b, typename Packet>
81 EIGEN_STRONG_INLINE Packet psll(const Packet& a);
82
83 template<int _b, typename Packet>
84 EIGEN_STRONG_INLINE Packet psrl(const Packet& a, int b = _b);
85
86 template<int b, typename Packet>
87 EIGEN_STRONG_INLINE Packet psll64(const Packet& a);
88
89 template<int b, typename Packet>
90 EIGEN_STRONG_INLINE Packet psrl64(const Packet& a);
91
92 template<typename Packet>
93 EIGEN_STRONG_INLINE bool predux_all(const Packet& a);
94
95 template<typename Packet>
96 EIGEN_STRONG_INLINE bool predux_any(const Packet& a);
97
98 /*template<typename Packet>
99 EIGEN_STRONG_INLINE Packet psll(const Packet& a, int b);
100
101 template<typename Packet>
102 EIGEN_STRONG_INLINE Packet psrl(const Packet& a, int b);
103
104 template<typename Packet>
105 EIGEN_STRONG_INLINE Packet psll64(const Packet& a, int b);
106
107 template<typename Packet>
108 EIGEN_STRONG_INLINE Packet psrl64(const Packet& a, int b);*/
109
110 template<typename Packet>
111 EIGEN_STRONG_INLINE int pmovemask(const Packet& a);
112
113 template<typename Packet>
114 EIGEN_STRONG_INLINE typename std::enable_if<
115 IsFloatPacket<Packet>::value, Packet
116 >::type pext_sign(const Packet& a)
117 {
118 using IntPacket = decltype(reinterpret_to_int(a));
119 return reinterpret_to_float(
120 pand(reinterpret_to_int(a), pset1<IntPacket>(0x80000000))
121 );
122 }
123
124 template<typename Packet>
125 EIGEN_STRONG_INLINE typename std::enable_if<
126 IsDoublePacket<Packet>::value, Packet
127 >::type pext_sign(const Packet& a)
128 {
129 using IntPacket = decltype(reinterpret_to_int(a));
130 return reinterpret_to_double(
131 pand(reinterpret_to_int(a), pseti64<IntPacket>(0x8000000000000000))
132 );
133 }
134
135 /*template<>
136 EIGEN_STRONG_INLINE uint64_t psll64<uint64_t>(const uint64_t& a, int b)
137 {
138 return a << b;
139 }
140
141 template<>
142 EIGEN_STRONG_INLINE uint64_t psrl64<uint64_t>(const uint64_t& a, int b)
143 {
144 return a >> b;
145 }*/
146
147 // approximation : lgamma(z) ~= (z+2.5)ln(z+3) - z - 3 + 0.5 ln (2pi) + 1/12/(z + 3) - ln (z(z+1)(z+2))
148 template<typename Packet>
149 EIGEN_STRONG_INLINE Packet plgamma_approx(const Packet& x)
150 {
151 auto x_3 = padd(x, pset1<Packet>(3));
152 auto ret = pmul(padd(x_3, pset1<Packet>(-0.5)), plog(x_3));
153 ret = psub(ret, x_3);
154 ret = padd(ret, pset1<Packet>(0.9189385332046727));
155 ret = padd(ret, pdiv(pset1<Packet>(1 / 12.), x_3));
156 ret = psub(ret, plog(pmul(
157 pmul(psub(x_3, pset1<Packet>(1)), psub(x_3, pset1<Packet>(2))), x)));
158 return ret;
159 }
160
161 template<typename Packet>
162 EIGEN_STRONG_INLINE Packet pcmplt(const Packet& a, const Packet& b);
163
164 template<typename Packet>
165 EIGEN_STRONG_INLINE Packet pcmple(const Packet& a, const Packet& b);
166
167 template<typename Packet>
168 EIGEN_STRONG_INLINE Packet pbitnot(const Packet& a);
169
170 template<typename PacketIf, typename Packet>
171 EIGEN_STRONG_INLINE Packet pblendv(const PacketIf& ifPacket, const Packet& thenPacket, const Packet& elsePacket);
172
173 template<typename Packet>
174 EIGEN_STRONG_INLINE Packet pgather(const int* addr, const Packet& index);
175
176 template<typename Packet>
177 EIGEN_STRONG_INLINE auto pgather(const float* addr, const Packet& index) -> decltype(reinterpret_to_float(std::declval<Packet>()));
178
179 template<typename Packet>
180 EIGEN_STRONG_INLINE auto pgather(const double* addr, const Packet& index, bool upperhalf = false) -> decltype(reinterpret_to_double(std::declval<Packet>()));
181
182 template<typename Packet>
183 EIGEN_STRONG_INLINE Packet ptruncate(const Packet& a);
184
185 template<typename Packet>
186 EIGEN_STRONG_INLINE Packet pcmpeq64(const Packet& a, const Packet& b);
187
188 template<typename Packet>
189 EIGEN_STRONG_INLINE Packet pcmplt64(const Packet& a, const Packet& b);
190
191 template<typename Packet>
192 EIGEN_STRONG_INLINE Packet pmuluadd64(const Packet& a, uint64_t b, uint64_t c);
193
194 template<typename IntPacket>
195 EIGEN_STRONG_INLINE auto bit_to_ur_float(const IntPacket& x) -> decltype(reinterpret_to_float(x))
196 {
197 using FloatPacket = decltype(reinterpret_to_float(x));
198
199 const IntPacket lower = pset1<IntPacket>(0x7FFFFF),
200 upper = pset1<IntPacket>(127 << 23);
201 const FloatPacket one = pset1<FloatPacket>(1);
202
203 return psub(reinterpret_to_float(por(pand(x, lower), upper)), one);
204 }
205
206 template<typename IntPacket>
207 EIGEN_STRONG_INLINE auto bit_to_ur_double(const IntPacket& x) -> decltype(reinterpret_to_double(x))
208 {
209 using DoublePacket = decltype(reinterpret_to_double(x));
210
211 const IntPacket lower = pseti64<IntPacket>(0xFFFFFFFFFFFFFull),
212 upper = pseti64<IntPacket>(1023ull << 52);
213 const DoublePacket one = pset1<DoublePacket>(1);
214
215 return psub(reinterpret_to_double(por(pand(x, lower), upper)), one);
216 }
217
218 template<typename Packet>
219 EIGEN_STRONG_INLINE Packet pnew_andnot(const Packet& a, const Packet& b)
220 {
221#if defined(EIGEN_VECTORIZE_NEON) || defined(EIGENRAND_EIGEN_34_MODE)
222 return pandnot(a, b);
223#else
224 return pandnot(b, a);
225#endif
226 }
227
228 template<typename _Scalar>
229 struct BitScalar;
230
231 template<>
232 struct BitScalar<float>
233 {
234 float to_ur(uint32_t x)
235 {
236 union
237 {
238 uint32_t u;
239 float f;
240 };
241 u = (x & 0x7FFFFF) | (127 << 23);
242 return f - 1.f;
243 }
244
245 float to_nzur(uint32_t x)
246 {
247 return to_ur(x) + std::numeric_limits<float>::epsilon() / 8;
248 }
249 };
250
251 template<>
252 struct BitScalar<double>
253 {
254 double to_ur(uint64_t x)
255 {
256 union
257 {
258 uint64_t u;
259 double f;
260 };
261 u = (x & 0xFFFFFFFFFFFFFull) | (1023ull << 52);
262 return f - 1.;
263 }
264
265 double to_nzur(uint64_t x)
266 {
267 return to_ur(x) + std::numeric_limits<double>::epsilon() / 8;
268 }
269 };
270
271
272 struct float2
273 {
274 float f[2];
275 };
276
277 EIGEN_STRONG_INLINE float2 bit_to_ur_float(uint64_t x)
278 {
279 BitScalar<float> bs;
280 float2 ret;
281 ret.f[0] = bs.to_ur(x & 0xFFFFFFFF);
282 ret.f[1] = bs.to_ur(x >> 32);
283 return ret;
284 }
285
286 template<typename Packet>
287 EIGEN_STRONG_INLINE typename std::enable_if<
288 IsFloatPacket<Packet>::value
289 >::type psincos(Packet x, Packet& s, Packet& c)
290 {
291 Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
292 using IntPacket = decltype(reinterpret_to_int(x));
293 IntPacket emm0, emm2, emm4;
294
295 sign_bit_sin = x;
296 /* take the absolute value */
297 x = pabs(x);
298 /* extract the sign bit (upper one) */
299 sign_bit_sin = pext_sign(sign_bit_sin);
300
301 /* scale by 4/Pi */
302 y = pmul(x, pset1<Packet>(1.27323954473516));
303
304 /* store the integer part of y in emm2 */
305 emm2 = pcast<Packet, IntPacket>(y);
306
307 /* j=(j+1) & (~1) (see the cephes sources) */
308 emm2 = padd(emm2, pset1<IntPacket>(1));
309 emm2 = pand(emm2, pset1<IntPacket>(~1));
310 y = pcast<IntPacket, Packet>(emm2);
311
312 emm4 = emm2;
313
314 /* get the swap sign flag for the sine */
315 emm0 = pand(emm2, pset1<IntPacket>(4));
316 emm0 = psll<29>(emm0);
317 Packet swap_sign_bit_sin = reinterpret_to_float(emm0);
318
319 /* get the polynom selection mask for the sine*/
320 emm2 = pand(emm2, pset1<IntPacket>(2));
321
322 emm2 = pcmpeq(emm2, pset1<IntPacket>(0));
323 Packet poly_mask = reinterpret_to_float(emm2);
324
325 /* The magic pass: "Extended precision modular arithmetic"
326 x = ((x - y * DP1) - y * DP2) - y * DP3; */
327 xmm1 = pset1<Packet>(-0.78515625);
328 xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
329 xmm3 = pset1<Packet>(-3.77489497744594108e-8);
330 xmm1 = pmul(y, xmm1);
331 xmm2 = pmul(y, xmm2);
332 xmm3 = pmul(y, xmm3);
333 x = padd(x, xmm1);
334 x = padd(x, xmm2);
335 x = padd(x, xmm3);
336
337 emm4 = psub(emm4, pset1<IntPacket>(2));
338 emm4 = pnew_andnot(pset1<IntPacket>(4), emm4);
339 emm4 = psll<29>(emm4);
340 Packet sign_bit_cos = reinterpret_to_float(emm4);
341 sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
342
343
344 /* Evaluate the first polynom (0 <= x <= Pi/4) */
345 Packet z = pmul(x, x);
346 y = pset1<Packet>(2.443315711809948E-005);
347
348 y = pmul(y, z);
349 y = padd(y, pset1<Packet>(-1.388731625493765E-003));
350 y = pmul(y, z);
351 y = padd(y, pset1<Packet>(4.166664568298827E-002));
352 y = pmul(y, z);
353 y = pmul(y, z);
354 Packet tmp = pmul(z, pset1<Packet>(0.5));
355 y = psub(y, tmp);
356 y = padd(y, pset1<Packet>(1));
357
358 /* Evaluate the second polynom (Pi/4 <= x <= 0) */
359
360 Packet y2 = pset1<Packet>(-1.9515295891E-4);
361 y2 = pmul(y2, z);
362 y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
363 y2 = pmul(y2, z);
364 y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
365 y2 = pmul(y2, z);
366 y2 = pmul(y2, x);
367 y2 = padd(y2, x);
368
369 /* select the correct result from the two polynoms */
370 xmm3 = poly_mask;
371 Packet ysin2 = pand(xmm3, y2);
372 Packet ysin1 = pnew_andnot(y, xmm3);
373 y2 = psub(y2, ysin2);
374 y = psub(y, ysin1);
375
376 xmm1 = padd(ysin1, ysin2);
377 xmm2 = padd(y, y2);
378
379 /* update the sign */
380 s = pxor(xmm1, sign_bit_sin);
381 c = pxor(xmm2, sign_bit_cos);
382 }
383
384 template<typename Packet>
385 EIGEN_STRONG_INLINE typename std::enable_if<
386 IsDoublePacket<Packet>::value
387 >::type psincos(Packet x, Packet& s, Packet& c)
388 {
389 Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
390 using IntPacket = decltype(reinterpret_to_int(x));
391 IntPacket emm0, emm2, emm4;
392
393 sign_bit_sin = x;
394 /* take the absolute value */
395 x = pabs(x);
396 /* extract the sign bit (upper one) */
397 sign_bit_sin = pext_sign(sign_bit_sin);
398
399 /* scale by 4/Pi */
400 y = pmul(x, pset1<Packet>(1.27323954473516));
401
402 /* store the integer part of y in emm2 */
403 emm2 = pcast64<Packet, IntPacket>(y);
404
405 /* j=(j+1) & (~1) (see the cephes sources) */
406 emm2 = padd64(emm2, pseti64<IntPacket>(1));
407 emm2 = pand(emm2, pseti64<IntPacket>(~1ll));
408 y = pcast64<IntPacket, Packet>(emm2);
409
410 emm4 = emm2;
411
412 /* get the swap sign flag for the sine */
413 emm0 = pand(emm2, pseti64<IntPacket>(4));
414 emm0 = psll64<61>(emm0);
415 Packet swap_sign_bit_sin = reinterpret_to_double(emm0);
416
417 /* get the polynom selection mask for the sine*/
418 emm2 = pand(emm2, pseti64<IntPacket>(2));
419
420 emm2 = pcmpeq64(emm2, pseti64<IntPacket>(0));
421 Packet poly_mask = reinterpret_to_double(emm2);
422
423 /* The magic pass: "Extended precision modular arithmetic"
424 x = ((x - y * DP1) - y * DP2) - y * DP3; */
425 xmm1 = pset1<Packet>(-0.78515625);
426 xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
427 xmm3 = pset1<Packet>(-3.77489497744594108e-8);
428 xmm1 = pmul(y, xmm1);
429 xmm2 = pmul(y, xmm2);
430 xmm3 = pmul(y, xmm3);
431 x = padd(x, xmm1);
432 x = padd(x, xmm2);
433 x = padd(x, xmm3);
434
435 emm4 = psub64(emm4, pseti64<IntPacket>(2));
436 emm4 = pnew_andnot(pseti64<IntPacket>(4), emm4);
437 emm4 = psll64<61>(emm4);
438 Packet sign_bit_cos = reinterpret_to_double(emm4);
439 sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
440
441
442 /* Evaluate the first polynom (0 <= x <= Pi/4) */
443 Packet z = pmul(x, x);
444 y = pset1<Packet>(2.443315711809948E-005);
445
446 y = pmul(y, z);
447 y = padd(y, pset1<Packet>(-1.388731625493765E-003));
448 y = pmul(y, z);
449 y = padd(y, pset1<Packet>(4.166664568298827E-002));
450 y = pmul(y, z);
451 y = pmul(y, z);
452 Packet tmp = pmul(z, pset1<Packet>(0.5));
453 y = psub(y, tmp);
454 y = padd(y, pset1<Packet>(1));
455
456 /* Evaluate the second polynom (Pi/4 <= x <= 0) */
457
458 Packet y2 = pset1<Packet>(-1.9515295891E-4);
459 y2 = pmul(y2, z);
460 y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
461 y2 = pmul(y2, z);
462 y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
463 y2 = pmul(y2, z);
464 y2 = pmul(y2, x);
465 y2 = padd(y2, x);
466
467 /* select the correct result from the two polynoms */
468 xmm3 = poly_mask;
469 Packet ysin2 = pand(xmm3, y2);
470 Packet ysin1 = pnew_andnot(y, xmm3);
471 y2 = psub(y2, ysin2);
472 y = psub(y, ysin1);
473
474 xmm1 = padd(ysin1, ysin2);
475 xmm2 = padd(y, y2);
476
477 /* update the sign */
478 s = pxor(xmm1, sign_bit_sin);
479 c = pxor(xmm2, sign_bit_cos);
480 }
481
482 template<typename Packet>
483 EIGEN_STRONG_INLINE typename std::enable_if<
484 IsDoublePacket<Packet>::value, Packet
485 >::type _psin(Packet x)
486 {
487 Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
488 using IntPacket = decltype(reinterpret_to_int(x));
489 IntPacket emm0, emm2;
490
491 sign_bit_sin = x;
492 /* take the absolute value */
493 x = pabs(x);
494 /* extract the sign bit (upper one) */
495 sign_bit_sin = pext_sign(sign_bit_sin);
496
497 /* scale by 4/Pi */
498 y = pmul(x, pset1<Packet>(1.27323954473516));
499
500 /* store the integer part of y in emm2 */
501 emm2 = pcast64<Packet, IntPacket>(y);
502
503 /* j=(j+1) & (~1) (see the cephes sources) */
504 emm2 = padd64(emm2, pseti64<IntPacket>(1));
505 emm2 = pand(emm2, pseti64<IntPacket>(~1ll));
506 y = pcast64<IntPacket, Packet>(emm2);
507
508 /* get the swap sign flag for the sine */
509 emm0 = pand(emm2, pseti64<IntPacket>(4));
510 emm0 = psll64<61>(emm0);
511 Packet swap_sign_bit_sin = reinterpret_to_double(emm0);
512
513 /* get the polynom selection mask for the sine*/
514 emm2 = pand(emm2, pseti64<IntPacket>(2));
515
516 emm2 = pcmpeq64(emm2, pseti64<IntPacket>(0));
517 Packet poly_mask = reinterpret_to_double(emm2);
518
519 /* The magic pass: "Extended precision modular arithmetic"
520 x = ((x - y * DP1) - y * DP2) - y * DP3; */
521 xmm1 = pset1<Packet>(-0.78515625);
522 xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
523 xmm3 = pset1<Packet>(-3.77489497744594108e-8);
524 xmm1 = pmul(y, xmm1);
525 xmm2 = pmul(y, xmm2);
526 xmm3 = pmul(y, xmm3);
527 x = padd(x, xmm1);
528 x = padd(x, xmm2);
529 x = padd(x, xmm3);
530
531 sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
532
533
534 /* Evaluate the first polynom (0 <= x <= Pi/4) */
535 Packet z = pmul(x, x);
536 y = pset1<Packet>(2.443315711809948E-005);
537
538 y = pmul(y, z);
539 y = padd(y, pset1<Packet>(-1.388731625493765E-003));
540 y = pmul(y, z);
541 y = padd(y, pset1<Packet>(4.166664568298827E-002));
542 y = pmul(y, z);
543 y = pmul(y, z);
544 Packet tmp = pmul(z, pset1<Packet>(0.5));
545 y = psub(y, tmp);
546 y = padd(y, pset1<Packet>(1));
547
548 /* Evaluate the second polynom (Pi/4 <= x <= 0) */
549
550 Packet y2 = pset1<Packet>(-1.9515295891E-4);
551 y2 = pmul(y2, z);
552 y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
553 y2 = pmul(y2, z);
554 y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
555 y2 = pmul(y2, z);
556 y2 = pmul(y2, x);
557 y2 = padd(y2, x);
558
559 /* select the correct result from the two polynoms */
560 xmm3 = poly_mask;
561 Packet ysin2 = pand(xmm3, y2);
562 Packet ysin1 = pnew_andnot(y, xmm3);
563
564 xmm1 = padd(ysin1, ysin2);
565
566 /* update the sign */
567 return pxor(xmm1, sign_bit_sin);
568 }
569 }
570}
571
572#ifdef EIGEN_VECTORIZE_AVX
574#endif
575
576#ifdef EIGEN_VECTORIZE_SSE2
578#endif
579
580#ifdef EIGEN_VECTORIZE_NEON
582#endif
583
584namespace Eigen
585{
586 namespace internal
587 {
588 template<int b, typename Packet>
589 EIGEN_STRONG_INLINE Packet psll(const Packet& a)
590 {
591 return BitShifter<Packet>{}.template sll<b>(a);
592 }
593
594 template<int _b, typename Packet>
595 EIGEN_STRONG_INLINE Packet psrl(const Packet& a, int b)
596 {
597 return BitShifter<Packet>{}.template srl<_b>(a, b);
598 }
599
600 template<int b, typename Packet>
601 EIGEN_STRONG_INLINE Packet psll64(const Packet& a)
602 {
603 return BitShifter<Packet>{}.template sll64<b>(a);
604 }
605
606 template<int b, typename Packet>
607 EIGEN_STRONG_INLINE Packet psrl64(const Packet& a)
608 {
609 return BitShifter<Packet>{}.template srl64<b>(a);
610 }
611 }
612}
613
614#endif