EigenRand  0.5.0
 
Loading...
Searching...
No Matches
arch/AVX512/PacketFilter.h
Go to the documentation of this file.
1
12#ifndef EIGENRAND_PACKET_FILTER_AVX512_H
13#define EIGENRAND_PACKET_FILTER_AVX512_H
14
15#include <immintrin.h>
16
17namespace Eigen
18{
19 namespace Rand
20 {
21 namespace detail
22 {
23 template<>
24 class CompressMask<64>
25 {
26 CompressMask() {}
27
28 public:
29 enum { full_size = 16 };
30 static const CompressMask& get_inst()
31 {
32 static CompressMask cm;
33 return cm;
34 }
35
36 template<typename Packet>
37 EIGEN_STRONG_INLINE int compress_append(Packet& _value, const Packet& _mask,
38 Packet& _rest, int rest_cnt, bool& full) const
39 {
40 auto& value = reinterpret_cast<internal::Packet16f&>(_value);
41 auto& mask = reinterpret_cast<const internal::Packet16f&>(_mask);
42 auto& rest = reinterpret_cast<internal::Packet16f&>(_rest);
43
44 const __mmask16 m = _mm512_movepi32_mask(_mm512_castps_si512(mask));
45
46 if (m == 0xFFFF)
47 {
48 full = true;
49 return rest_cnt;
50 }
51
52 const int cnt_m = _mm_popcnt_u32(m);
53
54 const __m512i counting = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55 __m512i rotate = _mm512_sub_epi32(counting, _mm512_set1_epi32(cnt_m));
56 __m512 rot_rest = _mm512_permutexvar_ps(rotate, rest);
57
58 __m512 p1 = _mm512_mask_compress_ps(rot_rest, m, value);
59
60 auto new_cnt = rest_cnt + cnt_m;
61 if (new_cnt >= full_size)
62 {
63 rest = rot_rest;
64 value = p1;
65 full = true;
66 return new_cnt - full_size;
67 }
68 else
69 {
70 rest = p1;
71 full = false;
72 return new_cnt;
73 }
74 }
75 };
76 }
77 }
78}
79#endif