12#ifndef EIGENRAND_PACKET_FILTER_NEON_H
13#define EIGENRAND_PACKET_FILTER_NEON_H
24 class CompressMask<16>
26 std::array<std::array<uint8_t, 16>, 7> idx;
27 std::array<internal::Packet4f, 4> selector;
28 std::array<uint8_t, 16> cnt;
30 static uint8_t make_compress(
int mask,
int offset = 0)
34 for (
int i = 0; i < 4; ++i)
40 if (n >= 0) ret |= (i & 3) << (2 * n);
47 static uint8_t count(
int mask)
50 for (
int i = 0; i < 4; ++i)
60 for (
int i = 0; i < 16; ++i)
62 for (
int o = 0; o < 7; ++o)
64 idx[o][i] = make_compress(i, o < 4 ? o : o - 7);
70 uint32_t v[4] = { 0, };
72 selector[0] = (internal::Packet4f)vreinterpretq_f32_u32(vld1q_u32(v));
74 selector[1] = (internal::Packet4f)vreinterpretq_f32_u32(vld1q_u32(v));
76 selector[2] = (internal::Packet4f)vreinterpretq_f32_u32(vld1q_u32(v));
78 selector[3] = (internal::Packet4f)vreinterpretq_f32_u32(vld1q_u32(v));
81 static EIGEN_STRONG_INLINE internal::Packet4f permute(
const internal::Packet4f& p, uint8_t i)
87 t[1] = u[(i >> 2) & 3];
88 t[2] = u[(i >> 4) & 3];
89 t[3] = u[(i >> 6) & 3];
95 enum { full_size = 4 };
97 static const CompressMask& get_inst()
99 static CompressMask cm;
103 template<
typename Packet>
104 EIGEN_STRONG_INLINE
int compress_append(Packet& _value,
const Packet& _mask,
105 Packet& _rest,
int rest_cnt,
bool& full)
const
107 auto& value =
reinterpret_cast<internal::Packet4f&
>(_value);
108 auto& mask =
reinterpret_cast<const internal::Packet4f&
>(_mask);
109 auto& rest =
reinterpret_cast<internal::Packet4f&
>(_rest);
111 int m = internal::pmovemask(mask);
112 if (cnt[m] == full_size)
117 auto p1 = permute(value, idx[rest_cnt][m]);
118 p1 = internal::pblendv(selector[rest_cnt], rest, p1);
120 auto new_cnt = rest_cnt + cnt[m];
121 if (new_cnt >= full_size)
123 if (new_cnt > full_size)
125 rest = permute(value, idx[new_cnt - cnt[m] + full_size - 1][m]);
129 return new_cnt - full_size;