12#ifndef EIGENRAND_MORE_PACKET_MATH_H 
   13#define EIGENRAND_MORE_PACKET_MATH_H 
   18#define EIGENRAND_PRINT_PACKET(p) do { using _MTy = typename std::remove_const<typename std::remove_reference<decltype(p)>::type>::type; typename std::conditional<Eigen::internal::IsFloatPacket<_MTy>::value, float, typename std::conditional<Eigen::internal::IsDoublePacket<_MTy>::value, double, int>::type>::type f[4]; Eigen::internal::pstore(f, p); std::cout << #p " " << f[0] << " " << f[1] << " " << f[2] << " " << f[3] << std::endl; } while(0) 
   25        struct IsIntPacket : std::false_type {};
 
   28        struct IsFloatPacket : std::false_type {};
 
   31        struct IsDoublePacket : std::false_type {};
 
   36        template<
typename Packet>
 
   37        struct reinterpreter{};
 
   39        template<
typename Packet>
 
   40        inline auto reinterpret_to_float(
const Packet& x)
 
   41            -> 
decltype(reinterpreter<Packet>{}.to_float(x))
 
   43            return reinterpreter<Packet>{}.to_float(x);
 
   46        template<
typename Packet>
 
   47        inline auto reinterpret_to_double(
const Packet& x)
 
   48            -> 
decltype(reinterpreter<Packet>{}.to_double(x))
 
   50            return reinterpreter<Packet>{}.to_double(x);
 
   53        template<
typename Packet>
 
   54        inline auto reinterpret_to_int(
const Packet& x)
 
   55            -> 
decltype(reinterpreter<Packet>{}.to_int(x))
 
   57            return reinterpreter<Packet>{}.to_int(x);
 
   60        template<
typename Packet>
 
   61        EIGEN_STRONG_INLINE 
void split_two(
const Packet& p, 
typename HalfPacket<Packet>::type& a, 
typename HalfPacket<Packet>::type& b);
 
   63        template<
typename Packet>
 
   64        EIGEN_STRONG_INLINE Packet pseti64(uint64_t a);
 
   66        template<
typename Packet>
 
   67        EIGEN_STRONG_INLINE Packet padd64(
const Packet& a, 
const Packet& b);
 
   69        template<
typename Packet>
 
   70        EIGEN_STRONG_INLINE Packet psub64(
const Packet& a, 
const Packet& b);
 
   72        template <
typename SrcPacket, 
typename TgtPacket>
 
   73        EIGEN_STRONG_INLINE TgtPacket pcast64(
const SrcPacket& a);
 
   75        template<
typename Packet>
 
   76        EIGEN_STRONG_INLINE Packet pcmpeq(
const Packet& a, 
const Packet& b);
 
   78        template<
typename Packet>
 
   81        template<
int b, 
typename Packet>
 
   82        EIGEN_STRONG_INLINE Packet psll(
const Packet& a);
 
   84        template<
int _b, 
typename Packet>
 
   85        EIGEN_STRONG_INLINE Packet psrl(
const Packet& a, 
int b = _b);
 
   87        template<
int b, 
typename Packet>
 
   88        EIGEN_STRONG_INLINE Packet psll64(
const Packet& a);
 
   90        template<
int b, 
typename Packet>
 
   91        EIGEN_STRONG_INLINE Packet psrl64(
const Packet& a);
 
   93        template<
typename Packet> 
 
   94        EIGEN_STRONG_INLINE 
bool predux_all(
const Packet& a);
 
   96        template<
typename Packet>
 
   97        EIGEN_STRONG_INLINE 
bool predux_any(
const Packet& a);
 
  111        template<
typename Packet>
 
  112        EIGEN_STRONG_INLINE 
int pmovemask(
const Packet& a);
 
  114        template<
typename Packet>
 
  115        EIGEN_STRONG_INLINE 
typename std::enable_if<
 
  116            IsFloatPacket<Packet>::value, Packet
 
  117        >::type pext_sign(
const Packet& a)
 
  119            using IntPacket = 
decltype(reinterpret_to_int(a));
 
  120            return reinterpret_to_float(
 
  121                pand(reinterpret_to_int(a), pset1<IntPacket>(0x80000000))
 
  125        template<
typename Packet>
 
  126        EIGEN_STRONG_INLINE 
typename std::enable_if<
 
  127            IsDoublePacket<Packet>::value, Packet
 
  128        >::type pext_sign(
const Packet& a)
 
  130            using IntPacket = 
decltype(reinterpret_to_int(a));
 
  131            return reinterpret_to_double(
 
  132                pand(reinterpret_to_int(a), pseti64<IntPacket>(0x8000000000000000))
 
  149        template<
typename Packet>
 
  150        EIGEN_STRONG_INLINE Packet plgamma_approx(
const Packet& x)
 
  152            auto x_3 = padd(x, pset1<Packet>(3));
 
  153            auto ret = pmul(padd(x_3, pset1<Packet>(-0.5)), plog(x_3));
 
  154            ret = psub(ret, x_3);
 
  155            ret = padd(ret, pset1<Packet>(0.9189385332046727));
 
  156            ret = padd(ret, pdiv(pset1<Packet>(1 / 12.), x_3));
 
  157            ret = psub(ret, plog(pmul(
 
  158                pmul(psub(x_3, pset1<Packet>(1)), psub(x_3, pset1<Packet>(2))), x)));
 
  162        template<
typename Packet>
 
  163        EIGEN_STRONG_INLINE Packet pcmplt(
const Packet& a, 
const Packet& b);
 
  165        template<
typename Packet>
 
  166        EIGEN_STRONG_INLINE Packet pcmple(
const Packet& a, 
const Packet& b);
 
  168        template<
typename Packet>
 
  169        EIGEN_STRONG_INLINE Packet pbitnot(
const Packet& a);
 
  171        template<
typename PacketIf, 
typename Packet>
 
  172        EIGEN_STRONG_INLINE Packet pblendv(
const PacketIf& ifPacket, 
const Packet& thenPacket, 
const Packet& elsePacket);
 
  174        template<
typename Packet>
 
  175        EIGEN_STRONG_INLINE Packet pgather(
const int* addr, 
const Packet& index);
 
  177        template<
typename Packet>
 
  178        EIGEN_STRONG_INLINE 
auto pgather(
const float* addr, 
const Packet& index) -> 
decltype(reinterpret_to_float(std::declval<Packet>()));
 
  180        template<
typename Packet>
 
  181        EIGEN_STRONG_INLINE 
auto pgather(
const double* addr, 
const Packet& index, 
bool upperhalf = 
false) -> 
decltype(reinterpret_to_double(std::declval<Packet>()));
 
  183        template<
typename Packet>
 
  184        EIGEN_STRONG_INLINE Packet ptruncate(
const Packet& a);
 
  186        template<
typename Packet>
 
  187        EIGEN_STRONG_INLINE Packet pcmpeq64(
const Packet& a, 
const Packet& b);
 
  189        template<
typename Packet>
 
  190        EIGEN_STRONG_INLINE Packet pcmplt64(
const Packet& a, 
const Packet& b);
 
  192        template<
typename Packet>
 
  193        EIGEN_STRONG_INLINE Packet pmuluadd64(
const Packet& a, uint64_t b, uint64_t c);
 
  195        template<
typename IntPacket>
 
  196        EIGEN_STRONG_INLINE 
auto bit_to_ur_float(
const IntPacket& x) -> 
decltype(reinterpret_to_float(x))
 
  198            using FloatPacket = 
decltype(reinterpret_to_float(x));
 
  200            const IntPacket lower = pset1<IntPacket>(0x7FFFFF),
 
  201                upper = pset1<IntPacket>(127 << 23);
 
  202            const FloatPacket one = pset1<FloatPacket>(1);
 
  204            return psub(reinterpret_to_float(por(pand(x, lower), upper)), one);
 
  207        template<
typename IntPacket>
 
  208        EIGEN_STRONG_INLINE 
auto bit_to_ur_double(
const IntPacket& x) -> 
decltype(reinterpret_to_double(x))
 
  210            using DoublePacket = 
decltype(reinterpret_to_double(x));
 
  212            const IntPacket lower = pseti64<IntPacket>(0xFFFFFFFFFFFFFull),
 
  213                upper = pseti64<IntPacket>(1023ull << 52);
 
  214            const DoublePacket one = pset1<DoublePacket>(1);
 
  216            return psub(reinterpret_to_double(por(pand(x, lower), upper)), one);
 
  219        template<
typename Packet>
 
  220        EIGEN_STRONG_INLINE Packet pnew_andnot(
const Packet& a, 
const Packet& b)
 
  222#if defined(EIGEN_VECTORIZE_NEON) || defined(EIGENRAND_EIGEN_34_MODE) 
  223            return pandnot(a, b);
 
  225            return pandnot(b, a);
 
  229        template<
typename _Scalar>
 
  233        struct BitScalar<float>
 
  235            float to_ur(uint32_t x)
 
  242                u = (x & 0x7FFFFF) | (127 << 23);
 
  246            float to_nzur(uint32_t x)
 
  248                return to_ur(x) + std::numeric_limits<float>::epsilon() / 8;
 
  253        struct BitScalar<double>
 
  255            double to_ur(uint64_t x)
 
  262                u = (x & 0xFFFFFFFFFFFFFull) | (1023ull << 52);
 
  266            double to_nzur(uint64_t x)
 
  268                return to_ur(x) + std::numeric_limits<double>::epsilon() / 8;
 
  278        EIGEN_STRONG_INLINE float2 bit_to_ur_float(uint64_t x)
 
  282            ret.f[0] = bs.to_ur(x & 0xFFFFFFFF);
 
  283            ret.f[1] = bs.to_ur(x >> 32);
 
  287        template<
typename Packet>
 
  288        EIGEN_STRONG_INLINE 
typename std::enable_if<
 
  289            IsFloatPacket<Packet>::value
 
  290        >::type psincos(Packet x, Packet& s, Packet& c)
 
  292            Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
 
  293            using IntPacket = 
decltype(reinterpret_to_int(x));
 
  294            IntPacket emm0, emm2, emm4;
 
  300            sign_bit_sin = pext_sign(sign_bit_sin);
 
  303            y = pmul(x, pset1<Packet>(1.27323954473516));
 
  306            emm2 = pcast<Packet, IntPacket>(y);
 
  309            emm2 = padd(emm2, pset1<IntPacket>(1));
 
  310            emm2 = pand(emm2, pset1<IntPacket>(~1));
 
  311            y = pcast<IntPacket, Packet>(emm2);
 
  316            emm0 = pand(emm2, pset1<IntPacket>(4));
 
  317            emm0 = psll<29>(emm0);
 
  318            Packet swap_sign_bit_sin = reinterpret_to_float(emm0);
 
  321            emm2 = pand(emm2, pset1<IntPacket>(2));
 
  323            emm2 = pcmpeq(emm2, pset1<IntPacket>(0));
 
  324            Packet poly_mask = reinterpret_to_float(emm2);
 
  328            xmm1 = pset1<Packet>(-0.78515625);
 
  329            xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
 
  330            xmm3 = pset1<Packet>(-3.77489497744594108e-8);
 
  331            xmm1 = pmul(y, xmm1);
 
  332            xmm2 = pmul(y, xmm2);
 
  333            xmm3 = pmul(y, xmm3);
 
  338            emm4 = psub(emm4, pset1<IntPacket>(2));
 
  339            emm4 = pnew_andnot(pset1<IntPacket>(4), emm4);
 
  340            emm4 = psll<29>(emm4);
 
  341            Packet sign_bit_cos = reinterpret_to_float(emm4);
 
  342            sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
 
  346            Packet z = pmul(x, x);
 
  347            y = pset1<Packet>(2.443315711809948E-005);
 
  350            y = padd(y, pset1<Packet>(-1.388731625493765E-003));
 
  352            y = padd(y, pset1<Packet>(4.166664568298827E-002));
 
  355            Packet tmp = pmul(z, pset1<Packet>(0.5));
 
  357            y = padd(y, pset1<Packet>(1));
 
  361            Packet y2 = pset1<Packet>(-1.9515295891E-4);
 
  363            y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
 
  365            y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
 
  372            Packet ysin2 = pand(xmm3, y2);
 
  373            Packet ysin1 = pnew_andnot(y, xmm3);
 
  374            y2 = psub(y2, ysin2);
 
  377            xmm1 = padd(ysin1, ysin2);
 
  381            s = pxor(xmm1, sign_bit_sin);
 
  382            c = pxor(xmm2, sign_bit_cos);
 
  385        template<
typename Packet>
 
  386        EIGEN_STRONG_INLINE 
typename std::enable_if<
 
  387            IsDoublePacket<Packet>::value
 
  388        >::type psincos(Packet x, Packet& s, Packet& c)
 
  390            Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
 
  391            using IntPacket = 
decltype(reinterpret_to_int(x));
 
  392            IntPacket emm0, emm2, emm4;
 
  398            sign_bit_sin = pext_sign(sign_bit_sin);
 
  401            y = pmul(x, pset1<Packet>(1.27323954473516));
 
  404            emm2 = pcast64<Packet, IntPacket>(y);
 
  407            emm2 = padd64(emm2, pseti64<IntPacket>(1));
 
  408            emm2 = pand(emm2, pseti64<IntPacket>(~1ll));
 
  409            y = pcast64<IntPacket, Packet>(emm2);
 
  414            emm0 = pand(emm2, pseti64<IntPacket>(4));
 
  415            emm0 = psll64<61>(emm0);
 
  416            Packet swap_sign_bit_sin = reinterpret_to_double(emm0);
 
  419            emm2 = pand(emm2, pseti64<IntPacket>(2));
 
  421            emm2 = pcmpeq64(emm2, pseti64<IntPacket>(0));
 
  422            Packet poly_mask = reinterpret_to_double(emm2);
 
  426            xmm1 = pset1<Packet>(-0.78515625);
 
  427            xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
 
  428            xmm3 = pset1<Packet>(-3.77489497744594108e-8);
 
  429            xmm1 = pmul(y, xmm1);
 
  430            xmm2 = pmul(y, xmm2);
 
  431            xmm3 = pmul(y, xmm3);
 
  436            emm4 = psub64(emm4, pseti64<IntPacket>(2));
 
  437            emm4 = pnew_andnot(pseti64<IntPacket>(4), emm4);
 
  438            emm4 = psll64<61>(emm4);
 
  439            Packet sign_bit_cos = reinterpret_to_double(emm4);
 
  440            sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
 
  444            Packet z = pmul(x, x);
 
  445            y = pset1<Packet>(2.443315711809948E-005);
 
  448            y = padd(y, pset1<Packet>(-1.388731625493765E-003));
 
  450            y = padd(y, pset1<Packet>(4.166664568298827E-002));
 
  453            Packet tmp = pmul(z, pset1<Packet>(0.5));
 
  455            y = padd(y, pset1<Packet>(1));
 
  459            Packet y2 = pset1<Packet>(-1.9515295891E-4);
 
  461            y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
 
  463            y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
 
  470            Packet ysin2 = pand(xmm3, y2);
 
  471            Packet ysin1 = pnew_andnot(y, xmm3);
 
  472            y2 = psub(y2, ysin2);
 
  475            xmm1 = padd(ysin1, ysin2);
 
  479            s = pxor(xmm1, sign_bit_sin);
 
  480            c = pxor(xmm2, sign_bit_cos);
 
  483        template<
typename Packet>
 
  484        EIGEN_STRONG_INLINE 
typename std::enable_if<
 
  485            IsDoublePacket<Packet>::value, Packet
 
  486        >::type _psin(Packet x)
 
  488            Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
 
  489            using IntPacket = 
decltype(reinterpret_to_int(x));
 
  490            IntPacket emm0, emm2;
 
  496            sign_bit_sin = pext_sign(sign_bit_sin);
 
  499            y = pmul(x, pset1<Packet>(1.27323954473516));
 
  502            emm2 = pcast64<Packet, IntPacket>(y);
 
  505            emm2 = padd64(emm2, pseti64<IntPacket>(1));
 
  506            emm2 = pand(emm2, pseti64<IntPacket>(~1ll));
 
  507            y = pcast64<IntPacket, Packet>(emm2);
 
  510            emm0 = pand(emm2, pseti64<IntPacket>(4));
 
  511            emm0 = psll64<61>(emm0);
 
  512            Packet swap_sign_bit_sin = reinterpret_to_double(emm0);
 
  515            emm2 = pand(emm2, pseti64<IntPacket>(2));
 
  517            emm2 = pcmpeq64(emm2, pseti64<IntPacket>(0));
 
  518            Packet poly_mask = reinterpret_to_double(emm2);
 
  522            xmm1 = pset1<Packet>(-0.78515625);
 
  523            xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
 
  524            xmm3 = pset1<Packet>(-3.77489497744594108e-8);
 
  525            xmm1 = pmul(y, xmm1);
 
  526            xmm2 = pmul(y, xmm2);
 
  527            xmm3 = pmul(y, xmm3);
 
  532            sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
 
  536            Packet z = pmul(x, x);
 
  537            y = pset1<Packet>(2.443315711809948E-005);
 
  540            y = padd(y, pset1<Packet>(-1.388731625493765E-003));
 
  542            y = padd(y, pset1<Packet>(4.166664568298827E-002));
 
  545            Packet tmp = pmul(z, pset1<Packet>(0.5));
 
  547            y = padd(y, pset1<Packet>(1));
 
  551            Packet y2 = pset1<Packet>(-1.9515295891E-4);
 
  553            y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
 
  555            y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
 
  562            Packet ysin2 = pand(xmm3, y2);
 
  563            Packet ysin1 = pnew_andnot(y, xmm3);
 
  565            xmm1 = padd(ysin1, ysin2);
 
  568            return pxor(xmm1, sign_bit_sin);
 
  573#ifdef EIGEN_VECTORIZE_AVX512 
  577#ifdef EIGEN_VECTORIZE_AVX 
  581#ifdef EIGEN_VECTORIZE_SSE2 
  585#ifdef EIGEN_VECTORIZE_NEON 
  593        template<
int b, 
typename Packet>
 
  594        EIGEN_STRONG_INLINE Packet psll(
const Packet& a)
 
  596            return BitShifter<Packet>{}.template sll<b>(a);
 
  599        template<
int _b, 
typename Packet>
 
  600        EIGEN_STRONG_INLINE Packet psrl(
const Packet& a, 
int b)
 
  602            return BitShifter<Packet>{}.template srl<_b>(a, b);
 
  605        template<
int b, 
typename Packet>
 
  606        EIGEN_STRONG_INLINE Packet psll64(
const Packet& a)
 
  608            return BitShifter<Packet>{}.template sll64<b>(a);
 
  611        template<
int b, 
typename Packet>
 
  612        EIGEN_STRONG_INLINE Packet psrl64(
const Packet& a)
 
  614            return BitShifter<Packet>{}.template srl64<b>(a);