12#ifndef EIGENRAND_PACKET_FILTER_AVX512_H
13#define EIGENRAND_PACKET_FILTER_AVX512_H
24 class CompressMask<64>
29 enum { full_size = 16 };
30 static const CompressMask& get_inst()
32 static CompressMask cm;
36 template<
typename Packet>
37 EIGEN_STRONG_INLINE
int compress_append(Packet& _value,
const Packet& _mask,
38 Packet& _rest,
int rest_cnt,
bool& full)
const
40 auto& value =
reinterpret_cast<internal::Packet16f&
>(_value);
41 auto& mask =
reinterpret_cast<const internal::Packet16f&
>(_mask);
42 auto& rest =
reinterpret_cast<internal::Packet16f&
>(_rest);
44 const __mmask16 m = _mm512_movepi32_mask(_mm512_castps_si512(mask));
52 const int cnt_m = _mm_popcnt_u32(m);
54 const __m512i counting = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55 __m512i rotate = _mm512_sub_epi32(counting, _mm512_set1_epi32(cnt_m));
56 __m512 rot_rest = _mm512_permutexvar_ps(rotate, rest);
58 __m512 p1 = _mm512_mask_compress_ps(rot_rest, m, value);
60 auto new_cnt = rest_cnt + cnt_m;
61 if (new_cnt >= full_size)
66 return new_cnt - full_size;