12#ifndef EIGENRAND_MORE_PACKET_MATH_AVX_H 
   13#define EIGENRAND_MORE_PACKET_MATH_AVX_H 
   22        struct IsIntPacket<Packet8i> : std::true_type {};
 
   25        struct HalfPacket<Packet8i>
 
   27            using type = Packet4i;
 
   31        struct HalfPacket<Packet8f>
 
   33            using type = Packet4f;
 
   37        struct IsFloatPacket<Packet8f> : std::true_type {};
 
   40        struct IsDoublePacket<Packet4d> : std::true_type {};
 
   43        struct reinterpreter<Packet8i>
 
   45            EIGEN_STRONG_INLINE Packet8f to_float(
const Packet8i& x)
 
   47                return _mm256_castsi256_ps(x);
 
   50            EIGEN_STRONG_INLINE Packet4d to_double(
const Packet8i& x)
 
   52                return _mm256_castsi256_pd(x);
 
   55            EIGEN_STRONG_INLINE Packet8i to_int(
const Packet8i& x)
 
   62        struct reinterpreter<Packet8f>
 
   64            EIGEN_STRONG_INLINE Packet8f to_float(
const Packet8f& x)
 
   69            EIGEN_STRONG_INLINE Packet4d to_double(
const Packet8f& x)
 
   71                return _mm256_castps_pd(x);
 
   74            EIGEN_STRONG_INLINE Packet8i to_int(
const Packet8f& x)
 
   76                return _mm256_castps_si256(x);
 
   81        struct reinterpreter<Packet4d>
 
   83            EIGEN_STRONG_INLINE Packet8f to_float(
const Packet4d& x)
 
   85                return _mm256_castpd_ps(x);
 
   88            EIGEN_STRONG_INLINE Packet4d to_double(
const Packet4d& x)
 
   93            EIGEN_STRONG_INLINE Packet8i to_int(
const Packet4d& x)
 
   95                return _mm256_castpd_si256(x);
 
  100        EIGEN_STRONG_INLINE 
void split_two<Packet8i>(
const Packet8i& x, Packet4i& a, Packet4i& b)
 
  102            a = _mm256_extractf128_si256(x, 0);
 
  103            b = _mm256_extractf128_si256(x, 1);
 
  106        EIGEN_STRONG_INLINE Packet8i combine_two(
const Packet4i& a, 
const Packet4i& b)
 
  108            return _mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1);
 
  112        EIGEN_STRONG_INLINE 
void split_two<Packet8f>(
const Packet8f& x, Packet4f& a, Packet4f& b)
 
  114            a = _mm256_extractf128_ps(x, 0);
 
  115            b = _mm256_extractf128_ps(x, 1);
 
  118        EIGEN_STRONG_INLINE Packet8f combine_two(
const Packet4f& a, 
const Packet4f& b)
 
  120            return _mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1);
 
  124        EIGEN_STRONG_INLINE Packet4i combine_low32(
const Packet8i& a)
 
  126#ifdef EIGEN_VECTORIZE_AVX2 
  127            return _mm256_castsi256_si128(_mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7)));
 
  129            auto sc = _mm256_permutevar_ps(_mm256_castsi256_ps(a), _mm256_setr_epi32(0, 2, 1, 3, 1, 3, 0, 2));
 
  130            return _mm_castps_si128(_mm_blend_ps(_mm256_extractf128_ps(sc, 0), _mm256_extractf128_ps(sc, 1), 0b1100));
 
  135        EIGEN_STRONG_INLINE Packet8i pseti64<Packet8i>(uint64_t a)
 
  137            return _mm256_set1_epi64x(a);
 
  141        EIGEN_STRONG_INLINE Packet8i padd64<Packet8i>(
const Packet8i& a, 
const Packet8i& b)
 
  143#ifdef EIGEN_VECTORIZE_AVX2 
  144            return _mm256_add_epi64(a, b);
 
  146            Packet4i a1, a2, b1, b2;
 
  147            split_two(a, a1, a2);
 
  148            split_two(b, b1, b2);
 
  149            return combine_two((Packet4i)_mm_add_epi64(a1, b1), (Packet4i)_mm_add_epi64(a2, b2));
 
  154        EIGEN_STRONG_INLINE Packet8i psub64<Packet8i>(
const Packet8i& a, 
const Packet8i& b)
 
  156#ifdef EIGEN_VECTORIZE_AVX2 
  157            return _mm256_sub_epi64(a, b);
 
  159            Packet4i a1, a2, b1, b2;
 
  160            split_two(a, a1, a2);
 
  161            split_two(b, b1, b2);
 
  162            return combine_two((Packet4i)_mm_sub_epi64(a1, b1), (Packet4i)_mm_sub_epi64(a2, b2));
 
  167        EIGEN_STRONG_INLINE Packet8i pcmpeq<Packet8i>(
const Packet8i& a, 
const Packet8i& b)
 
  169#ifdef EIGEN_VECTORIZE_AVX2 
  170            return _mm256_cmpeq_epi32(a, b);
 
  172            Packet4i a1, a2, b1, b2;
 
  173            split_two(a, a1, a2);
 
  174            split_two(b, b1, b2);
 
  175            return combine_two((Packet4i)_mm_cmpeq_epi32(a1, b1), (Packet4i)_mm_cmpeq_epi32(a2, b2));
 
  180        EIGEN_STRONG_INLINE Packet8f pcmpeq<Packet8f>(
const Packet8f& a, 
const Packet8f& b)
 
  182            return _mm256_cmp_ps(a, b, _CMP_EQ_OQ);
 
  186        EIGEN_STRONG_INLINE Packet8i pnegate<Packet8i>(
const Packet8i& a)
 
  188#ifdef EIGEN_VECTORIZE_AVX2 
  189            return _mm256_sub_epi32(pset1<Packet8i>(0), a);
 
  192            split_two(a, a1, a2);
 
  193            return combine_two(_mm_sub_epi32(pset1<Packet4i>(0), a1), _mm_sub_epi32(pset1<Packet4i>(0), a2));
 
  198        struct BitShifter<Packet8i>
 
  201            EIGEN_STRONG_INLINE Packet8i sll(
const Packet8i& a)
 
  203#ifdef EIGEN_VECTORIZE_AVX2 
  204                return _mm256_slli_epi32(a, b);
 
  207                split_two(a, a1, a2);
 
  208                return combine_two((Packet4i)_mm_slli_epi32(a1, b), (Packet4i)_mm_slli_epi32(a2, b));
 
  213            EIGEN_STRONG_INLINE Packet8i srl(
const Packet8i& a, 
int _b = b)
 
  215#ifdef EIGEN_VECTORIZE_AVX2 
  218                    return _mm256_srli_epi32(a, b);
 
  222                    return _mm256_srli_epi32(a, _b);
 
  226                split_two(a, a1, a2);
 
  229                    return combine_two((Packet4i)_mm_srli_epi32(a1, b), (Packet4i)_mm_srli_epi32(a2, b));
 
  233                    return combine_two((Packet4i)_mm_srli_epi32(a1, _b), (Packet4i)_mm_srli_epi32(a2, _b));
 
  239            EIGEN_STRONG_INLINE Packet8i sll64(
const Packet8i& a)
 
  241#ifdef EIGEN_VECTORIZE_AVX2 
  242                return _mm256_slli_epi64(a, b);
 
  245                split_two(a, a1, a2);
 
  246                return combine_two((Packet4i)_mm_slli_epi64(a1, b), (Packet4i)_mm_slli_epi64(a2, b));
 
  251            EIGEN_STRONG_INLINE Packet8i srl64(
const Packet8i& a)
 
  253#ifdef EIGEN_VECTORIZE_AVX2 
  254                return _mm256_srli_epi64(a, b);
 
  257                split_two(a, a1, a2);
 
  258                return combine_two((Packet4i)_mm_srli_epi64(a1, b), (Packet4i)_mm_srli_epi64(a2, b));
 
  262#ifdef EIGENRAND_EIGEN_33_MODE 
  263        template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(
const Packet8i& a, 
const Packet8i& b)
 
  265    #ifdef EIGEN_VECTORIZE_AVX2 
  266            return _mm256_add_epi32(a, b);
 
  268            Packet4i a1, a2, b1, b2;
 
  269            split_two(a, a1, a2);
 
  270            split_two(b, b1, b2);
 
  271            return combine_two((Packet4i)_mm_add_epi32(a1, b1), (Packet4i)_mm_add_epi32(a2, b2));
 
  275        template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(
const Packet8i& a, 
const Packet8i& b)
 
  277    #ifdef EIGEN_VECTORIZE_AVX2 
  278            return _mm256_sub_epi32(a, b);
 
  280            Packet4i a1, a2, b1, b2;
 
  281            split_two(a, a1, a2);
 
  282            split_two(b, b1, b2);
 
  283            return combine_two((Packet4i)_mm_sub_epi32(a1, b1), (Packet4i)_mm_sub_epi32(a2, b2));
 
  287        template<> EIGEN_STRONG_INLINE Packet8i pand<Packet8i>(
const Packet8i& a, 
const Packet8i& b)
 
  289    #ifdef EIGEN_VECTORIZE_AVX2 
  290            return _mm256_and_si256(a, b);
 
  292            return reinterpret_to_int((Packet8f)_mm256_and_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
 
  296        template<> EIGEN_STRONG_INLINE Packet8i pandnot<Packet8i>(
const Packet8i& a, 
const Packet8i& b)
 
  298    #ifdef EIGEN_VECTORIZE_AVX2 
  299            return _mm256_andnot_si256(a, b);
 
  301            return reinterpret_to_int((Packet8f)_mm256_andnot_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
 
  305        template<> EIGEN_STRONG_INLINE Packet8i por<Packet8i>(
const Packet8i& a, 
const Packet8i& b)
 
  307    #ifdef EIGEN_VECTORIZE_AVX2 
  308            return _mm256_or_si256(a, b);
 
  310            return reinterpret_to_int((Packet8f)_mm256_or_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
 
  314        template<> EIGEN_STRONG_INLINE Packet8i pxor<Packet8i>(
const Packet8i& a, 
const Packet8i& b)
 
  316    #ifdef EIGEN_VECTORIZE_AVX2 
  317            return _mm256_xor_si256(a, b);
 
  319            return reinterpret_to_int((Packet8f)_mm256_xor_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
 
  323        template<> EIGEN_STRONG_INLINE 
bool predux_any(
const Packet8f& x)
 
  325            return !!_mm256_movemask_ps(x);
 
  328        template<> EIGEN_STRONG_INLINE 
bool predux_any(
const Packet8i& x)
 
  330            return predux_any(_mm256_castsi256_ps(x));
 
  334        template<> EIGEN_STRONG_INLINE 
bool predux_all(
const Packet8f& x)
 
  336            return _mm256_movemask_ps(x) == 0xFF;
 
  339        template<> EIGEN_STRONG_INLINE 
bool predux_all(
const Packet8i& x)
 
  341            return predux_all(_mm256_castsi256_ps(x));
 
  345        EIGEN_STRONG_INLINE Packet8i pcmplt<Packet8i>(
const Packet8i& a, 
const Packet8i& b)
 
  347#ifdef EIGEN_VECTORIZE_AVX2 
  348            return _mm256_cmpgt_epi32(b, a);
 
  350            Packet4i a1, a2, b1, b2;
 
  351            split_two(a, a1, a2);
 
  352            split_two(b, b1, b2);
 
  353            return combine_two((Packet4i)_mm_cmpgt_epi32(b1, a1), (Packet4i)_mm_cmpgt_epi32(b2, a2));
 
  358        EIGEN_STRONG_INLINE Packet8i pcmplt64<Packet8i>(
const Packet8i& a, 
const Packet8i& b)
 
  360#ifdef EIGEN_VECTORIZE_AVX2 
  361            return _mm256_cmpgt_epi64(b, a);
 
  363            Packet4i a1, a2, b1, b2;
 
  364            split_two(a, a1, a2);
 
  365            split_two(b, b1, b2);
 
  366            return combine_two((Packet4i)_mm_cmpgt_epi64(b1, a1), (Packet4i)_mm_cmpgt_epi64(b2, a2));
 
  371        EIGEN_STRONG_INLINE Packet8f pcmplt<Packet8f>(
const Packet8f& a, 
const Packet8f& b)
 
  373            return _mm256_cmp_ps(a, b, _CMP_LT_OQ);
 
  377        EIGEN_STRONG_INLINE Packet8f pcmple<Packet8f>(
const Packet8f& a, 
const Packet8f& b)
 
  379            return _mm256_cmp_ps(a, b, _CMP_LE_OQ);
 
  383        EIGEN_STRONG_INLINE Packet4d pcmplt<Packet4d>(
const Packet4d& a, 
const Packet4d& b)
 
  385            return _mm256_cmp_pd(a, b, _CMP_LT_OQ);
 
  389        EIGEN_STRONG_INLINE Packet4d pcmple<Packet4d>(
const Packet4d& a, 
const Packet4d& b)
 
  391            return _mm256_cmp_pd(a, b, _CMP_LE_OQ);
 
  395        EIGEN_STRONG_INLINE Packet8f pblendv(
const Packet8f& ifPacket, 
const Packet8f& thenPacket, 
const Packet8f& elsePacket)
 
  397            return _mm256_blendv_ps(elsePacket, thenPacket, ifPacket);
 
  401        EIGEN_STRONG_INLINE Packet8f pblendv(
const Packet8i& ifPacket, 
const Packet8f& thenPacket, 
const Packet8f& elsePacket)
 
  403            return pblendv(_mm256_castsi256_ps(ifPacket), thenPacket, elsePacket);
 
  407        EIGEN_STRONG_INLINE Packet8i pblendv(
const Packet8i& ifPacket, 
const Packet8i& thenPacket, 
const Packet8i& elsePacket)
 
  409            return _mm256_castps_si256(_mm256_blendv_ps(
 
  410                _mm256_castsi256_ps(elsePacket),
 
  411                _mm256_castsi256_ps(thenPacket),
 
  412                _mm256_castsi256_ps(ifPacket)
 
  417        EIGEN_STRONG_INLINE Packet4d pblendv(
const Packet4d& ifPacket, 
const Packet4d& thenPacket, 
const Packet4d& elsePacket)
 
  419            return _mm256_blendv_pd(elsePacket, thenPacket, ifPacket);
 
  423        EIGEN_STRONG_INLINE Packet4d pblendv(
const Packet8i& ifPacket, 
const Packet4d& thenPacket, 
const Packet4d& elsePacket)
 
  425            return pblendv(_mm256_castsi256_pd(ifPacket), thenPacket, elsePacket);
 
  429        EIGEN_STRONG_INLINE Packet8i pgather<Packet8i>(
const int* addr, 
const Packet8i& index)
 
  431#ifdef EIGEN_VECTORIZE_AVX2 
  432            return _mm256_i32gather_epi32(addr, index, 4);
 
  435            _mm256_storeu_si256((Packet8i*)u, index);
 
  436            return _mm256_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
 
  437                addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
 
  442        EIGEN_STRONG_INLINE Packet8f pgather<Packet8i>(
const float* addr, 
const Packet8i& index)
 
  444#ifdef EIGEN_VECTORIZE_AVX2 
  445            return _mm256_i32gather_ps(addr, index, 4);
 
  448            _mm256_storeu_si256((Packet8i*)u, index);
 
  449            return _mm256_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
 
  450                addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
 
  455        EIGEN_STRONG_INLINE Packet4d pgather<Packet8i>(
const double* addr, 
const Packet8i& index, 
bool upperhalf)
 
  457#ifdef EIGEN_VECTORIZE_AVX2 
  458            return _mm256_i32gather_pd(addr, _mm256_castsi256_si128(index), 8);
 
  461            _mm256_storeu_si256((Packet8i*)u, index);
 
  464                return _mm256_setr_pd(addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
 
  468                return _mm256_setr_pd(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
 
  474        EIGEN_STRONG_INLINE 
int pmovemask<Packet8f>(
const Packet8f& a)
 
  476            return _mm256_movemask_ps(a);
 
  480        EIGEN_STRONG_INLINE 
int pmovemask<Packet4d>(
const Packet4d& a)
 
  482            return _mm256_movemask_pd(a);
 
  486        EIGEN_STRONG_INLINE 
int pmovemask<Packet8i>(
const Packet8i& a)
 
  488            return pmovemask(_mm256_castsi256_ps(a));
 
  492        EIGEN_STRONG_INLINE Packet8f ptruncate<Packet8f>(
const Packet8f& a)
 
  494            return _mm256_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
 
  498        EIGEN_STRONG_INLINE Packet4d ptruncate<Packet4d>(
const Packet4d& a)
 
  500            return _mm256_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
 
  504        EIGEN_STRONG_INLINE Packet8i pcmpeq64<Packet8i>(
const Packet8i& a, 
const Packet8i& b)
 
  506#ifdef EIGEN_VECTORIZE_AVX2 
  507            return _mm256_cmpeq_epi64(a, b);
 
  509            Packet4i a1, a2, b1, b2;
 
  510            split_two(a, a1, a2);
 
  511            split_two(b, b1, b2);
 
  512            return combine_two((Packet4i)_mm_cmpeq_epi64(a1, b1), (Packet4i)_mm_cmpeq_epi64(a2, b2));
 
  517        EIGEN_STRONG_INLINE Packet8i pmuluadd64<Packet8i>(
const Packet8i& a, uint64_t b, uint64_t c)
 
  520            _mm256_storeu_si256((__m256i*)u, a);
 
  525            return _mm256_loadu_si256((__m256i*)u);
 
  528        EIGEN_STRONG_INLINE __m256d uint64_to_double(__m256i x) {
 
  529            auto y = _mm256_or_pd(_mm256_castsi256_pd(x), _mm256_set1_pd(0x0010000000000000));
 
  530            return _mm256_sub_pd(y, _mm256_set1_pd(0x0010000000000000));
 
  533        EIGEN_STRONG_INLINE __m256d int64_to_double(__m256i x) {
 
  534            x = padd64(x, _mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000)));
 
  535            return _mm256_sub_pd(_mm256_castsi256_pd(x), _mm256_set1_pd(0x0018000000000000));
 
  538        EIGEN_STRONG_INLINE __m256i double_to_int64(__m256d x) {
 
  539            x = _mm256_add_pd(_mm256_floor_pd(x), _mm256_set1_pd(0x0018000000000000));
 
  541                _mm256_castpd_si256(x),
 
  542                _mm256_castpd_si256(_mm256_set1_pd(0x0018000000000000))
 
  547        EIGEN_STRONG_INLINE Packet8i pcast64<Packet4d, Packet8i>(
const Packet4d& a)
 
  549            return double_to_int64(a);
 
  553        EIGEN_STRONG_INLINE Packet4d pcast64<Packet8i, Packet4d>(
const Packet8i& a)
 
  555            return int64_to_double(a);
 
  558        template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 
  559            Packet4d psin<Packet4d>(
const Packet4d& x)
 
  564    #ifdef EIGENRAND_EIGEN_33_MODE 
  566        EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
 
  567            plog<Packet4d>(
const Packet4d& _x) {
 
  569            _EIGEN_DECLARE_CONST_Packet4d(1, 1.0);
 
  570            _EIGEN_DECLARE_CONST_Packet4d(half, 0.5);
 
  572            auto inv_mant_mask = _mm256_castsi256_pd(pseti64<Packet8i>(~0x7ff0000000000000));
 
  573            auto min_norm_pos = _mm256_castsi256_pd(pseti64<Packet8i>(0x10000000000000));
 
  574            auto minus_inf = _mm256_castsi256_pd(pseti64<Packet8i>(0xfff0000000000000));
 
  577            _EIGEN_DECLARE_CONST_Packet4d(cephes_SQRTHF, 0.707106781186547524);
 
  578            _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p0, 7.0376836292E-2);
 
  579            _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p1, -1.1514610310E-1);
 
  580            _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p2, 1.1676998740E-1);
 
  581            _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p3, -1.2420140846E-1);
 
  582            _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p4, +1.4249322787E-1);
 
  583            _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p5, -1.6668057665E-1);
 
  584            _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p6, +2.0000714765E-1);
 
  585            _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p7, -2.4999993993E-1);
 
  586            _EIGEN_DECLARE_CONST_Packet4d(cephes_log_p8, +3.3333331174E-1);
 
  587            _EIGEN_DECLARE_CONST_Packet4d(cephes_log_q1, -2.12194440e-4);
 
  588            _EIGEN_DECLARE_CONST_Packet4d(cephes_log_q2, 0.693359375);
 
  590            Packet4d invalid_mask = _mm256_cmp_pd(x, _mm256_setzero_pd(), _CMP_NGE_UQ); 
 
  591            Packet4d iszero_mask = _mm256_cmp_pd(x, _mm256_setzero_pd(), _CMP_EQ_OQ);
 
  594            x = pmax(x, min_norm_pos);
 
  596            Packet4d emm0 = uint64_to_double(psrl64<52>(_mm256_castpd_si256(x)));
 
  597            Packet4d e = psub(emm0, pset1<Packet4d>(1022));
 
  600            x = _mm256_and_pd(x, inv_mant_mask);
 
  601            x = _mm256_or_pd(x, p4d_half);
 
  610            Packet4d mask = _mm256_cmp_pd(x, p4d_cephes_SQRTHF, _CMP_LT_OQ);
 
  611            Packet4d tmp = _mm256_and_pd(x, mask);
 
  613            e = psub(e, _mm256_and_pd(p4d_1, mask));
 
  616            Packet4d x2 = pmul(x, x);
 
  617            Packet4d x3 = pmul(x2, x);
 
  622            y = pmadd(p4d_cephes_log_p0, x, p4d_cephes_log_p1);
 
  623            y1 = pmadd(p4d_cephes_log_p3, x, p4d_cephes_log_p4);
 
  624            y2 = pmadd(p4d_cephes_log_p6, x, p4d_cephes_log_p7);
 
  625            y = pmadd(y, x, p4d_cephes_log_p2);
 
  626            y1 = pmadd(y1, x, p4d_cephes_log_p5);
 
  627            y2 = pmadd(y2, x, p4d_cephes_log_p8);
 
  628            y = pmadd(y, x3, y1);
 
  629            y = pmadd(y, x3, y2);
 
  633            y1 = pmul(e, p4d_cephes_log_q1);
 
  634            tmp = pmul(x2, p4d_half);
 
  637            y2 = pmul(e, p4d_cephes_log_q2);
 
  642            return pblendv(iszero_mask, minus_inf, _mm256_or_pd(x, invalid_mask));
 
  646    #if !(EIGEN_VERSION_AT_LEAST(3,3,5)) 
  647        template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(
const Packet4i& a) {
 
  648            return _mm_cvtepi32_ps(a);
 
  651        template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(
const Packet4f& a) {
 
  652            return _mm_cvttps_epi32(a);