18 #include <riscv_vector.h>
32 using DFromV =
typename DFromV_t<RemoveConst<V>>::type;
35 using TFromV = TFromD<DFromV<V>>;
37 template <
typename T,
size_t N>
40 return sizeof(T) * 8 / (N /
HWY_LANES(T));
44 template <
typename T,
int kShift = 0>
45 using Full = Simd<T, (kShift < 0) ? (
HWY_LANES(T) >> (-kShift))
57 #define HWY_RVV_FOREACH_B(X_MACRO, NAME, OP) \
58 X_MACRO(64, 0, 64, NAME, OP) \
59 X_MACRO(32, 0, 32, NAME, OP) \
60 X_MACRO(16, 0, 16, NAME, OP) \
61 X_MACRO(8, 0, 8, NAME, OP) \
62 X_MACRO(8, 1, 4, NAME, OP) \
63 X_MACRO(8, 2, 2, NAME, OP) \
64 X_MACRO(8, 3, 1, NAME, OP)
70 #define HWY_RVV_FOREACH_08(X_MACRO, BASE, CHAR, NAME, OP) \
71 X_MACRO(BASE, CHAR, 8, m1, m2, mf2, 0, 8, NAME, OP) \
72 X_MACRO(BASE, CHAR, 8, m2, m4, m1, 1, 4, NAME, OP) \
73 X_MACRO(BASE, CHAR, 8, m4, m8, m2, 2, 2, NAME, OP) \
74 X_MACRO(BASE, CHAR, 8, m8, __, m4, 3, 1, NAME, OP)
76 #define HWY_RVV_FOREACH_16(X_MACRO, BASE, CHAR, NAME, OP) \
77 X_MACRO(BASE, CHAR, 16, m1, m2, mf2, 0, 16, NAME, OP) \
78 X_MACRO(BASE, CHAR, 16, m2, m4, m1, 1, 8, NAME, OP) \
79 X_MACRO(BASE, CHAR, 16, m4, m8, m2, 2, 4, NAME, OP) \
80 X_MACRO(BASE, CHAR, 16, m8, __, m4, 3, 2, NAME, OP)
82 #define HWY_RVV_FOREACH_32(X_MACRO, BASE, CHAR, NAME, OP) \
83 X_MACRO(BASE, CHAR, 32, m1, m2, mf2, 0, 32, NAME, OP) \
84 X_MACRO(BASE, CHAR, 32, m2, m4, m1, 1, 16, NAME, OP) \
85 X_MACRO(BASE, CHAR, 32, m4, m8, m2, 2, 8, NAME, OP) \
86 X_MACRO(BASE, CHAR, 32, m8, __, m4, 3, 4, NAME, OP)
88 #define HWY_RVV_FOREACH_64(X_MACRO, BASE, CHAR, NAME, OP) \
89 X_MACRO(BASE, CHAR, 64, m1, m2, mf2, 0, 64, NAME, OP) \
90 X_MACRO(BASE, CHAR, 64, m2, m4, m1, 1, 32, NAME, OP) \
91 X_MACRO(BASE, CHAR, 64, m4, m8, m2, 2, 16, NAME, OP) \
92 X_MACRO(BASE, CHAR, 64, m8, __, m4, 3, 8, NAME, OP)
95 #define HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP) \
96 HWY_RVV_FOREACH_08(X_MACRO, uint, u, NAME, OP)
97 #define HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP) \
98 HWY_RVV_FOREACH_16(X_MACRO, uint, u, NAME, OP)
99 #define HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP) \
100 HWY_RVV_FOREACH_32(X_MACRO, uint, u, NAME, OP)
101 #define HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP) \
102 HWY_RVV_FOREACH_64(X_MACRO, uint, u, NAME, OP)
105 #define HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP) \
106 HWY_RVV_FOREACH_08(X_MACRO, int, i, NAME, OP)
107 #define HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP) \
108 HWY_RVV_FOREACH_16(X_MACRO, int, i, NAME, OP)
109 #define HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP) \
110 HWY_RVV_FOREACH_32(X_MACRO, int, i, NAME, OP)
111 #define HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP) \
112 HWY_RVV_FOREACH_64(X_MACRO, int, i, NAME, OP)
115 #define HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP) \
116 HWY_RVV_FOREACH_16(X_MACRO, float, f, NAME, OP)
117 #define HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP) \
118 HWY_RVV_FOREACH_32(X_MACRO, float, f, NAME, OP)
119 #define HWY_RVV_FOREACH_F64(X_MACRO, NAME, OP) \
120 HWY_RVV_FOREACH_64(X_MACRO, float, f, NAME, OP)
123 #define HWY_RVV_FOREACH_U(X_MACRO, NAME, OP) \
124 HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP) \
125 HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP) \
126 HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP) \
127 HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP)
129 #define HWY_RVV_FOREACH_I(X_MACRO, NAME, OP) \
130 HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP) \
131 HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP) \
132 HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP) \
133 HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP)
136 #define HWY_RVV_FOREACH_F(X_MACRO, NAME, OP) \
137 HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP) \
138 HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP) \
139 HWY_RVV_FOREACH_F64(X_MACRO, NAME, OP)
141 #define HWY_RVV_FOREACH_F(X_MACRO, NAME, OP) \
142 HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP) \
143 HWY_RVV_FOREACH_F64(X_MACRO, NAME, OP)
147 #define HWY_RVV_FOREACH_UI16(X_MACRO, NAME, OP) \
148 HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP) \
149 HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP)
151 #define HWY_RVV_FOREACH_UI32(X_MACRO, NAME, OP) \
152 HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP) \
153 HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP)
155 #define HWY_RVV_FOREACH_UI64(X_MACRO, NAME, OP) \
156 HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP) \
157 HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP)
160 #define HWY_RVV_FOREACH_UI(X_MACRO, NAME, OP) \
161 HWY_RVV_FOREACH_U(X_MACRO, NAME, OP) \
162 HWY_RVV_FOREACH_I(X_MACRO, NAME, OP)
164 #define HWY_RVV_FOREACH(X_MACRO, NAME, OP) \
165 HWY_RVV_FOREACH_U(X_MACRO, NAME, OP) \
166 HWY_RVV_FOREACH_I(X_MACRO, NAME, OP) \
167 HWY_RVV_FOREACH_F(X_MACRO, NAME, OP)
170 #define HWY_RVV_T(BASE, SEW) BASE##SEW##_t
171 #define HWY_RVV_D(CHAR, SEW, LMUL) D##CHAR##SEW##LMUL
172 #define HWY_RVV_V(BASE, SEW, LMUL) v##BASE##SEW##LMUL##_t
173 #define HWY_RVV_M(MLEN) vbool##MLEN##_t
181 #define HWY_SPECIALIZE(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
182 using HWY_RVV_D(CHAR, SEW, LMUL) = Full<HWY_RVV_T(BASE, SEW), SHIFT>; \
183 using V##CHAR##SEW##LMUL = HWY_RVV_V(BASE, SEW, LMUL); \
185 struct DFromV_t<HWY_RVV_V(BASE, SEW, LMUL)> { \
186 using Lane = HWY_RVV_T(BASE, SEW); \
187 using type = Full<Lane, SHIFT>; \
190 using Vf16m1 = vfloat16m1_t;
191 using Vf16m2 = vfloat16m2_t;
192 using Vf16m4 = vfloat16m4_t;
193 using Vf16m8 = vfloat16m8_t;
194 using Df16m1 = Full<float16_t, 0>;
195 using Df16m2 = Full<float16_t, 1>;
196 using Df16m4 = Full<float16_t, 2>;
197 using Df16m8 = Full<float16_t, 3>;
201 #undef HWY_SPECIALIZE
207 #define HWY_RVV_LANES(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
208 HWY_API size_t NAME(HWY_RVV_D(CHAR, SEW, LMUL) ) { \
209 return v##OP##SEW##LMUL(); \
223 #define HWY_RVV_AVL(SEW, SHIFT) Lanes(Full<HWY_RVV_T(uint, SEW), SHIFT>())
226 #define HWY_RVV_RETV_ARGV(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
228 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
229 return v##OP##_v_##CHAR##SEW##LMUL(v, HWY_RVV_AVL(SEW, SHIFT)); \
233 #define HWY_RVV_RETV_ARGVS(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
235 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
236 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_T(BASE, SEW) b) { \
237 return v##OP##_##CHAR##SEW##LMUL(a, b, HWY_RVV_AVL(SEW, SHIFT)); \
241 #define HWY_RVV_RETV_ARGVV(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
243 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
244 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) { \
245 return v##OP##_vv_##CHAR##SEW##LMUL(a, b, HWY_RVV_AVL(SEW, SHIFT)); \
252 #define HWY_RVV_SET(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
253 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
254 NAME(HWY_RVV_D(CHAR, SEW, LMUL) d, HWY_RVV_T(BASE, SEW) arg) { \
255 return v##OP##_##CHAR##SEW##LMUL(arg, Lanes(d)); \
271 using VFromD = decltype(
Set(D(), TFromD<D>()));
274 template <
typename T,
size_t N, HWY_IF_LE128(T, N)>
281 template <
typename T,
size_t N>
292 #define HWY_RVV_UNDEFINED(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
294 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
295 NAME(HWY_RVV_D(CHAR, SEW, LMUL) ) { \
296 return v##OP##_##CHAR##SEW##LMUL(); \
300 #undef HWY_RVV_UNDEFINED
313 #define HWY_RVV_CAST_U8(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
315 HWY_API vuint8##LMUL##_t BitCastToByte(vuint8##LMUL##_t v) { return v; } \
316 HWY_API vuint8##LMUL##_t BitCastFromByte(HWY_RVV_D(CHAR, SEW, LMUL) , \
317 vuint8##LMUL##_t v) { \
322 #define HWY_RVV_CAST_I8(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
324 HWY_API vuint8##LMUL##_t BitCastToByte(vint8##LMUL##_t v) { \
325 return vreinterpret_v_i8##LMUL##_u8##LMUL(v); \
327 HWY_API vint8##LMUL##_t BitCastFromByte(HWY_RVV_D(CHAR, SEW, LMUL) , \
328 vuint8##LMUL##_t v) { \
329 return vreinterpret_v_u8##LMUL##_i8##LMUL(v); \
334 #define HWY_RVV_CAST_U(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
335 HWY_API vuint8##LMUL##_t BitCastToByte(HWY_RVV_V(BASE, SEW, LMUL) v) { \
336 return v##OP##_v_##CHAR##SEW##LMUL##_u8##LMUL(v); \
338 HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \
339 HWY_RVV_D(CHAR, SEW, LMUL) , vuint8##LMUL##_t v) { \
340 return v##OP##_v_u8##LMUL##_##CHAR##SEW##LMUL(v); \
344 #define HWY_RVV_CAST_IF(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
346 HWY_API vuint8##LMUL##_t BitCastToByte(HWY_RVV_V(BASE, SEW, LMUL) v) { \
347 return v##OP##_v_u##SEW##LMUL##_u8##LMUL( \
348 v##OP##_v_##CHAR##SEW##LMUL##_u##SEW##LMUL(v)); \
350 HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \
351 HWY_RVV_D(CHAR, SEW, LMUL) , vuint8##LMUL##_t v) { \
352 return v##OP##_v_u##SEW##LMUL##_##CHAR##SEW##LMUL( \
353 v##OP##_v_u8##LMUL##_u##SEW##LMUL(v)); \
366 #undef HWY_RVV_CAST_U8
367 #undef HWY_RVV_CAST_I8
368 #undef HWY_RVV_CAST_U
369 #undef HWY_RVV_CAST_IF
379 template <
class D,
class FromV>
385 template <
typename T,
size_t N,
class FromV, HWY_IF_LE128(T, N)>
392 template <
class V,
class DU = RebindToUn
signed<DFromV<V>>>
403 #define HWY_RVV_IOTA(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
404 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_D(CHAR, SEW, LMUL) d) { \
405 return v##OP##_##CHAR##SEW##LMUL(Lanes(d)); \
411 template <
class D,
class DU = RebindToUn
signed<D>>
417 template <
typename T,
size_t N, HWY_IF_LE128(T, N)>
430 template <
class V, HWY_IF_FLOAT_V(V)>
446 template <
class V, HWY_IF_FLOAT_V(V)>
448 using DF = DFromV<V>;
449 using DU = RebindToUnsigned<DF>;
455 #undef HWY_RVV_OR_MASK
459 template <
class V, HWY_IF_FLOAT_V(V)>
461 using DF = DFromV<V>;
462 using DU = RebindToUnsigned<DF>;
475 template <
class V, HWY_IF_FLOAT_V(V)>
477 using DF = DFromV<V>;
478 using DU = RebindToUnsigned<DF>;
486 return And(
Not(not_a), b);
540 #define HWY_RVV_SHIFT(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
541 template <int kBits> \
542 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
543 return v##OP##_vx_##CHAR##SEW##LMUL(v, kBits, HWY_RVV_AVL(SEW, SHIFT)); \
545 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
546 NAME##Same(HWY_RVV_V(BASE, SEW, LMUL) v, int bits) { \
547 return v##OP##_vx_##CHAR##SEW##LMUL(v, static_cast<uint8_t>(bits), \
548 HWY_RVV_AVL(SEW, SHIFT)); \
561 #define HWY_RVV_SHIFT_VV(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
563 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
564 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, LMUL) bits) { \
565 return v##OP##_vv_##CHAR##SEW##LMUL(v, bits, HWY_RVV_AVL(SEW, SHIFT)); \
570 #define HWY_RVV_SHIFT_II(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
572 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
573 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, LMUL) bits) { \
574 return v##OP##_vv_##CHAR##SEW##LMUL(v, detail::BitCastToUnsigned(bits), \
575 HWY_RVV_AVL(SEW, SHIFT)); \
585 #undef HWY_RVV_SHIFT_II
586 #undef HWY_RVV_SHIFT_VV
647 #define HWY_RVV_FMA(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
648 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
649 NAME(HWY_RVV_V(BASE, SEW, LMUL) mul, HWY_RVV_V(BASE, SEW, LMUL) x, \
650 HWY_RVV_V(BASE, SEW, LMUL) add) { \
651 return v##OP##_vv_##CHAR##SEW##LMUL(add, mul, x, HWY_RVV_AVL(SEW, SHIFT)); \
674 #define HWY_RVV_RETM_ARGVV(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
676 HWY_API HWY_RVV_M(MLEN) \
677 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) { \
678 return v##OP##_vv_##CHAR##SEW##LMUL##_b##MLEN(a, b, \
679 HWY_RVV_AVL(SEW, SHIFT)); \
683 #define HWY_RVV_RETM_ARGVS(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
685 HWY_API HWY_RVV_M(MLEN) \
686 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_T(BASE, SEW) b) { \
687 return v##OP##_vx_##CHAR##SEW##LMUL##_b##MLEN(a, b, \
688 HWY_RVV_AVL(SEW, SHIFT)); \
710 #undef HWY_RVV_RETM_ARGVV
711 #undef HWY_RVV_RETM_ARGVS
716 HWY_API auto Ge(
const V a,
const V b) -> decltype(
Le(a, b)) {
721 HWY_API auto Gt(
const V a,
const V b) -> decltype(
Lt(a, b)) {
734 #define HWY_RVV_RETM_ARGM(SEW, SHIFT, MLEN, NAME, OP) \
735 HWY_API HWY_RVV_M(MLEN) NAME(HWY_RVV_M(MLEN) m) { \
736 return vm##OP##_m_b##MLEN(m, ~0ull); \
741 #undef HWY_RVV_RETM_ARGM
746 #define HWY_RVV_RETM_ARGMM(SEW, SHIFT, MLEN, NAME, OP) \
747 HWY_API HWY_RVV_M(MLEN) NAME(HWY_RVV_M(MLEN) a, HWY_RVV_M(MLEN) b) { \
748 return vm##OP##_mm_b##MLEN(b, a, HWY_RVV_AVL(SEW, SHIFT)); \
762 #undef HWY_RVV_RETM_ARGMM
765 #define HWY_RVV_IF_THEN_ELSE(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, \
767 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
768 NAME(HWY_RVV_M(MLEN) m, HWY_RVV_V(BASE, SEW, LMUL) yes, \
769 HWY_RVV_V(BASE, SEW, LMUL) no) { \
770 return v##OP##_vvm_##CHAR##SEW##LMUL(m, no, yes, HWY_RVV_AVL(SEW, SHIFT)); \
775 #undef HWY_RVV_IF_THEN_ELSE
778 template <
class M,
class V>
784 template <
class M,
class V>
799 template <
class D,
typename MFrom>
809 #define HWY_RVV_VEC_FROM_MASK(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, \
811 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
812 NAME(HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_M(MLEN) m) { \
813 return v##OP##_##CHAR##SEW##LMUL##_m(m, v0, v0, 1, \
814 HWY_RVV_AVL(SEW, SHIFT)); \
818 #undef HWY_RVV_VEC_FROM_MASK
821 template <
class D, HWY_IF_NOT_FLOAT_D(D)>
823 return detail::SubS(
Zero(d), mask);
826 template <
class D, HWY_IF_FLOAT_D(D)>
834 const auto v0 =
Zero(DFromV<V>());
842 return ShiftRight<
sizeof(TFromV<V>) * 8 - 1>(v);
847 #define HWY_RVV_FIND_FIRST_TRUE(SEW, SHIFT, MLEN, NAME, OP) \
849 HWY_API intptr_t FindFirstTrue(D d, HWY_RVV_M(MLEN) m) { \
850 static_assert(MLenFromD(d) == MLEN, "Type mismatch"); \
851 return vfirst_m_b##MLEN(m, Lanes(d)); \
855 #undef HWY_RVV_FIND_FIRST_TRUE
865 #define HWY_RVV_ALL_TRUE(SEW, SHIFT, MLEN, NAME, OP) \
867 HWY_API bool AllTrue(D d, HWY_RVV_M(MLEN) m) { \
868 static_assert(MLenFromD(d) == MLEN, "Type mismatch"); \
869 return AllFalse(d, vmnot_m_b##MLEN(m, Lanes(d))); \
873 #undef HWY_RVV_ALL_TRUE
877 #define HWY_RVV_COUNT_TRUE(SEW, SHIFT, MLEN, NAME, OP) \
879 HWY_API size_t CountTrue(D d, HWY_RVV_M(MLEN) m) { \
880 static_assert(MLenFromD(d) == MLEN, "Type mismatch"); \
881 return vpopc_m_b##MLEN(m, Lanes(d)); \
885 #undef HWY_RVV_COUNT_TRUE
891 #define HWY_RVV_LOAD(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
892 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
893 NAME(HWY_RVV_D(CHAR, SEW, LMUL) d, \
894 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
895 return v##OP##SEW##_v_##CHAR##SEW##LMUL(p, Lanes(d)); \
901 template <
typename T,
size_t N, HWY_IF_LE128(T, N)>
931 #define HWY_RVV_MASKED_LOAD(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, \
933 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
934 NAME(HWY_RVV_M(MLEN) m, HWY_RVV_D(CHAR, SEW, LMUL) d, \
935 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
936 return v##OP##SEW##_v_##CHAR##SEW##LMUL##_m(m, Zero(d), p, Lanes(d)); \
939 #undef HWY_RVV_MASKED_LOAD
943 #define HWY_RVV_RET_ARGVDP(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
945 HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, \
946 HWY_RVV_D(CHAR, SEW, LMUL) d, \
947 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
948 return v##OP##SEW##_v_##CHAR##SEW##LMUL(p, v, Lanes(d)); \
951 #undef HWY_RVV_RET_ARGVDP
954 template <
typename T,
size_t N, HWY_IF_LE128(T, N)>
963 template <
class V,
class D>
969 template <
class V,
class D,
typename T>
971 Store(v, d, aligned);
976 #define HWY_RVV_SCATTER(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
978 HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, \
979 HWY_RVV_D(CHAR, SEW, LMUL) d, \
980 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT base, \
981 HWY_RVV_V(int, SEW, LMUL) offset) { \
982 return v##OP##ei##SEW##_v_##CHAR##SEW##LMUL( \
983 base, detail::BitCastToUnsigned(offset), v, Lanes(d)); \
986 #undef HWY_RVV_SCATTER
989 template <
typename T,
size_t N, HWY_IF_LE128(T, N)>
998 template <
class D, HWY_IF_LANE_SIZE_D(D, 4)>
1004 template <
class D, HWY_IF_LANE_SIZE_D(D, 8)>
1006 const VFromD<RebindToSigned<D>> index) {
1012 #define HWY_RVV_GATHER(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
1013 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1014 NAME(HWY_RVV_D(CHAR, SEW, LMUL) d, \
1015 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT base, \
1016 HWY_RVV_V(int, SEW, LMUL) offset) { \
1017 return v##OP##ei##SEW##_v_##CHAR##SEW##LMUL( \
1018 base, detail::BitCastToUnsigned(offset), Lanes(d)); \
1021 #undef HWY_RVV_GATHER
1024 template <
typename T,
size_t N, HWY_IF_LE128(T, N)>
1033 template <
class D, HWY_IF_LANE_SIZE_D(D, 4)>
1039 template <
class D, HWY_IF_LANE_SIZE_D(D, 8)>
1041 const VFromD<RebindToSigned<D>> index) {
1046 #if HWY_COMPILER_GCC && !HWY_COMPILER_CLANG
1050 #define HWY_RVV_STORE3(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
1051 HWY_API void NAME( \
1052 HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_V(BASE, SEW, LMUL) v1, \
1053 HWY_RVV_V(BASE, SEW, LMUL) v2, HWY_RVV_D(CHAR, SEW, LMUL) d, \
1054 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned) { \
1055 const v##BASE##SEW##LMUL##x3_t triple = \
1056 vcreate_##CHAR##SEW##LMUL##x3(v0, v1, v2); \
1057 return v##OP##e8_v_##CHAR##SEW##LMUL##x3(unaligned, triple, Lanes(d)); \
1063 #undef HWY_RVV_STORE3
1066 template <
typename T,
size_t N, HWY_IF_LE128(T, N)>
1068 VFromD<Simd<T, N>> v2, Simd<T, N> ,
1075 #define HWY_RVV_STORE4(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
1076 HWY_API void NAME( \
1077 HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_V(BASE, SEW, LMUL) v1, \
1078 HWY_RVV_V(BASE, SEW, LMUL) v2, HWY_RVV_V(BASE, SEW, LMUL) v3, \
1079 HWY_RVV_D(CHAR, SEW, LMUL) d, \
1080 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT aligned) { \
1081 const v##BASE##SEW##LMUL##x4_t quad = \
1082 vcreate_##CHAR##SEW##LMUL##x4(v0, v1, v2, v3); \
1083 return v##OP##e8_v_##CHAR##SEW##LMUL##x4(aligned, quad, Lanes(d)); \
1089 #undef HWY_RVV_STORE4
1092 template <
typename T,
size_t N, HWY_IF_LE128(T, N)>
1095 Simd<T, N> , T* unaligned) {
1103 #define HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, LMUL, LMUL_IN) \
1104 HWY_API HWY_RVV_V(BASE, BITS, LMUL) PromoteTo( \
1105 HWY_RVV_D(CHAR, BITS, LMUL) d, HWY_RVV_V(BASE_IN, BITS_IN, LMUL_IN) v) { \
1106 return OP##CHAR##BITS##LMUL(v, Lanes(d)); \
1109 #define HWY_RVV_PROMOTE_X2(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN) \
1110 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf2) \
1111 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m2, m1) \
1112 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m4, m2) \
1113 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m8, m4)
1115 #define HWY_RVV_PROMOTE_X4(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN) \
1116 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf4) \
1117 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m2, mf2) \
1118 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m4, m1) \
1119 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m8, m2)
1141 #undef HWY_RVV_PROMOTE_X4
1142 #undef HWY_RVV_PROMOTE_X2
1143 #undef HWY_RVV_PROMOTE
1167 const Rebind<uint16_t, decltype(d)> du16;
1177 return vnclipu_wx_u16m1(v, 0,
Lanes(d));
1180 return vnclipu_wx_u16m2(v, 0,
Lanes(d));
1183 return vnclipu_wx_u16m4(v, 0,
Lanes(d));
1187 return vnclipu_wx_u8m1(v, 0,
Lanes(d));
1190 return vnclipu_wx_u8m2(v, 0,
Lanes(d));
1193 return vnclipu_wx_u8m4(v, 0,
Lanes(d));
1228 return vnclipu_wx_u8m1(vnclipu_wx_u16m2(v, 0, avl), 0, avl);
1232 return vnclipu_wx_u8m2(vnclipu_wx_u16m4(v, 0, avl), 0, avl);
1238 return vnclip_wx_i8m1(v, 0,
Lanes(d));
1241 return vnclip_wx_i8m2(v, 0,
Lanes(d));
1244 return vnclip_wx_i8m4(v, 0,
Lanes(d));
1248 return vnclip_wx_i16m1(v, 0,
Lanes(d));
1251 return vnclip_wx_i16m2(v, 0,
Lanes(d));
1254 return vnclip_wx_i16m4(v, 0,
Lanes(d));
1268 return vfncvt_rod_f_f_w_f16m1(v,
Lanes(d));
1271 return vfncvt_rod_f_f_w_f16m2(v,
Lanes(d));
1274 return vfncvt_rod_f_f_w_f16m4(v,
Lanes(d));
1279 return vfncvt_rod_f_f_w_f32m1(v,
Lanes(d));
1282 return vfncvt_rod_f_f_w_f32m2(v,
Lanes(d));
1285 return vfncvt_rod_f_f_w_f32m4(v,
Lanes(d));
1289 return vfncvt_rtz_x_f_w_i32m1(v,
Lanes(d));
1292 return vfncvt_rtz_x_f_w_i32m2(v,
Lanes(d));
1295 return vfncvt_rtz_x_f_w_i32m4(v,
Lanes(d));
1302 const Rebind<uint32_t, decltype(d)> du32;
1308 #define HWY_RVV_CONVERT(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
1310 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1311 ConvertTo(HWY_RVV_D(CHAR, SEW, LMUL) d, HWY_RVV_V(int, SEW, LMUL) v) { \
1312 return vfcvt_f_x_v_f##SEW##LMUL(v, Lanes(d)); \
1315 HWY_API HWY_RVV_V(int, SEW, LMUL) \
1316 ConvertTo(HWY_RVV_D(i, SEW, LMUL) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
1317 return vfcvt_rtz_x_f_v_i##SEW##LMUL(v, Lanes(d)); \
1320 HWY_API HWY_RVV_V(int, SEW, LMUL) NearestInt(HWY_RVV_V(BASE, SEW, LMUL) v) { \
1321 return vfcvt_x_f_v_i##SEW##LMUL(v, HWY_RVV_AVL(SEW, SHIFT)); \
1327 #undef HWY_RVV_CONVERT
1330 template <
typename T,
size_t N,
class FromV, HWY_IF_LE128(T, N)>
1341 template <
typename T,
size_t N>
1344 return HWY_MIN(16 /
sizeof(T), N);
1347 template <
class D,
class V>
1349 using T = MakeUnsigned<TFromD<D>>;
1350 return AndS(iota0,
static_cast<T
>(~(
LanesPerBlock(d) - 1)));
1353 template <
size_t kLanes,
class D>
1358 const auto idx_mod = AndS(
Iota0(du), kLanesPerBlock - 1);
1359 return LtS(
BitCast(di, idx_mod),
static_cast<TFromD<decltype(di)
>>(kLanes));
1363 #define HWY_RVV_SLIDE(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
1364 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1365 NAME(HWY_RVV_V(BASE, SEW, LMUL) dst, HWY_RVV_V(BASE, SEW, LMUL) src, \
1367 return v##OP##_vx_##CHAR##SEW##LMUL(dst, src, lanes, \
1368 HWY_RVV_AVL(SEW, SHIFT)); \
1374 #undef HWY_RVV_SLIDE
1395 const auto lo_down = detail::SlideDown(lo, lo,
Lanes(
DFromV<V>()) / 2);
1403 const auto hi_up = detail::SlideUp(hi, hi,
Lanes(
DFromV<V>()) / 2);
1404 const auto lo_down = detail::SlideDown(lo, lo,
Lanes(
DFromV<V>()) / 2);
1415 using D = DFromV<V>;
1431 #define HWY_RVV_TRUNC(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
1432 HWY_API HWY_RVV_V(BASE, SEW, HALF) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
1433 return v##OP##_v_##CHAR##SEW##LMUL##_##CHAR##SEW##HALF(v); \
1443 #undef HWY_RVV_TRUNC
1446 template <
class D, HWY_IF_NOT_LANE_SIZE_D(D, 8)>
1454 template <
class D, HWY_IF_LANE_SIZE_D(D, 8)>
1456 const Repartition<uint32_t, D> d32;
1460 template <
class D, HWY_IF_NOT_LANE_SIZE_D(D, 8)>
1469 #define HWY_RVV_GET_LANE(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
1471 HWY_API HWY_RVV_T(BASE, SEW) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
1472 return v##OP##_s_##CHAR##SEW##LMUL##_##CHAR##SEW(v); \
1477 #undef HWY_RVV_GET_LANE
1482 const RebindToUnsigned<DFromV<V>> du;
1489 template <
class D,
class DU = RebindToUn
signed<D>>
1491 #if HWY_IS_DEBUG_BUILD
1492 const size_t N =
Lanes(d);
1493 for (
size_t i = 0; i < N; ++i) {
1499 return Load(DU(), idx);
1504 #define HWY_RVV_TABLE(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
1505 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1506 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(uint, SEW, LMUL) idx) { \
1507 return v##OP##_vv_##CHAR##SEW##LMUL(v, idx, HWY_RVV_AVL(SEW, SHIFT)); \
1511 #undef HWY_RVV_TABLE
1517 using TU =
TFromD<decltype(du)>;
1518 const size_t N =
Lanes(du);
1525 #define HWY_RVV_COMPRESS(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
1527 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1528 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_M(MLEN) mask) { \
1529 return v##OP##_vm_##CHAR##SEW##LMUL(mask, v, v, HWY_RVV_AVL(SEW, SHIFT)); \
1536 #undef HWY_RVV_COMPRESS
1539 template <
class V,
class M,
class D>
1549 template <
size_t kBytes,
class D,
class V = VFromD<D>>
1552 const auto hi8 =
BitCast(d8, hi);
1553 const auto lo8 =
BitCast(d8, lo);
1554 const auto hi_up = detail::SlideUp(hi8, hi8, 16 - kBytes);
1555 const auto lo_down = detail::SlideDown(lo8, lo8, kBytes);
1561 template <
size_t kLanes,
class D,
class V = VFromD<D>>
1563 constexpr
size_t kLanesUp = 16 /
sizeof(
TFromV<V>) - kLanes;
1564 const auto hi_up = detail::SlideUp(hi, hi, kLanesUp);
1565 const auto lo_down = detail::SlideDown(lo, lo, kLanes);
1566 const auto is_lo = detail::FirstNPerBlock<kLanesUp>(d);
1574 static_assert(
sizeof(
TFromD<decltype(d)>) == 4,
"Defined for 32-bit types");
1576 const auto v64 =
BitCast(du64, v);
1577 return BitCast(d,
Or(ShiftRight<32>(v64), ShiftLeft<32>(v64)));
1584 static_assert(
sizeof(
TFromD<decltype(d)>) == 4,
"Defined for 32-bit types");
1585 return CombineShiftRightLanes<3>(d, v, v);
1592 static_assert(
sizeof(
TFromD<decltype(d)>) == 4,
"Defined for 32-bit types");
1593 return CombineShiftRightLanes<1>(d, v, v);
1600 static_assert(
sizeof(
TFromD<decltype(d)>) == 4,
"Defined for 32-bit types");
1601 return CombineShiftRightLanes<2>(d, v, v);
1608 static_assert(
sizeof(
TFromD<decltype(d)>) == 8,
"Defined for 64-bit types");
1609 return CombineShiftRightLanes<1>(d, v, v);
1620 template <
class V,
class VI>
1625 const auto idx8 =
Add(
BitCast(d8, idx), offsets128);
1629 template <
class V,
class VI>
1640 template <
int kLane,
class V>
1644 static_assert(0 <= kLane && kLane < kLanesPerBlock,
"Invalid lane");
1647 idx = detail::AddS(idx, kLane);
1654 template <
size_t kLanes,
class D,
class V = VFromD<D>>
1657 const auto shifted = detail::SlideUp(v, v, kLanes);
1660 const auto idx_mod = detail::AndS(
detail::Iota0(di), kLanesPerBlock - 1);
1661 const auto clear =
Lt(
BitCast(di, idx_mod),
Set(di, kLanes));
1665 template <
size_t kLanes,
class V>
1667 return ShiftLeftLanes<kLanes>(DFromV<V>(), v);
1672 template <
int kBytes,
class V>
1678 template <
int kBytes,
class V>
1680 return ShiftLeftBytes<kBytes>(DFromV<V>(), v);
1684 template <
size_t kLanes,
typename T,
size_t N,
class V = VFromD<Simd<T, N>>>
1688 if (N <= 16 /
sizeof(T)) {
1692 const auto shifted = detail::SlideDown(v, v, kLanes);
1695 const auto idx_mod = detail::AndS(
detail::Iota0(di), kLanesPerBlock - 1);
1696 const auto keep =
Lt(
BitCast(di, idx_mod),
Set(di, kLanesPerBlock - kLanes));
1701 template <
int kBytes,
class D,
class V = VFromD<D>>
1710 template <
class D,
class V>
1712 static_assert(
IsSame<TFromD<D>, TFromV<V>>(),
"D/V mismatch");
1716 const auto idx_mod = ShiftRight<1>(detail::AndS(i, kLanesPerBlock - 1));
1718 const auto is_even =
Eq(detail::AndS(i, 1),
Zero(du));
1730 template <
class D,
class V>
1736 const auto idx_mod = ShiftRight<1>(detail::AndS(i, kLanesPerBlock - 1));
1738 const auto idx = detail::AddS(idx_lower, kLanesPerBlock / 2);
1739 const auto is_even =
Eq(detail::AndS(i, 1),
Zero(du));
1746 template <
class V,
class DW = RepartitionToW
ide<DFromV<V>>>
1748 const RepartitionToNarrow<DW> dn;
1749 static_assert(
IsSame<
TFromD<decltype(dn)>, TFromV<V>>(),
"D/V mismatch");
1753 template <
class V,
class DW = RepartitionToW
ide<DFromV<V>>>
1759 template <
class DW,
class V>
1761 const RepartitionToNarrow<DW> dn;
1762 static_assert(
IsSame<
TFromD<decltype(dn)>, TFromV<V>>(),
"D/V mismatch");
1769 #define HWY_RVV_REDUCE(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
1770 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1771 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, m1) v0) { \
1772 return Set(HWY_RVV_D(CHAR, SEW, LMUL)(), \
1773 GetLane(v##OP##_vs_##CHAR##SEW##LMUL##_##CHAR##SEW##m1( \
1774 v0, v, v0, HWY_RVV_AVL(SEW, SHIFT)))); \
1787 return detail::RedSum(v, v0);
1801 const auto neutral =
Set(d1, HighestValue<T>());
1802 return detail::RedMin(v, neutral);
1816 const auto neutral =
Set(d1, LowestValue<T>());
1817 return detail::RedMax(v, neutral);
1820 #undef HWY_RVV_REDUCE
1828 const auto loaded =
Load(d, p);
1831 const auto idx = detail::AndS(
detail::Iota0(d), kLanesPerBlock - 1);
1836 #define HWY_RVV_STORE_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP) \
1838 HWY_API size_t StoreMaskBits(HWY_RVV_M(MLEN) m, uint8_t* bits) { \
1841 const size_t num_bytes = (Lanes(d8) + MLEN - 1) / MLEN; \
1848 template <class D> \
1849 HWY_API size_t StoreMaskBits(D , HWY_RVV_M(MLEN) m, \
1851 return StoreMaskBits(m, bits); \
1854 #undef HWY_RVV_STORE_MASK_BITS
1859 template <
class D, HWY_IF_NOT_LANE_SIZE_D(D, 1)>
1865 template <
class D, HWY_IF_LANE_SIZE_D(D, 1)>
1867 const auto zero =
Zero(d);
1868 const auto one =
Set(d, 1);
1869 return Eq(detail::SlideUp(one, zero, n), one);
1874 template <
class V, HWY_IF_SIGNED_V(V)>
1880 #define HWY_RVV_RETV_ARGV2(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, \
1882 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
1883 return v##OP##_vv_##CHAR##SEW##LMUL(v, v, HWY_RVV_AVL(SEW, SHIFT)); \
1890 template <
class V, HWY_IF_SIGNED_V(V)>
1897 #undef HWY_RVV_RETV_ARGV2
1927 const auto int_f =
ConvertTo(df, integer);
1939 const auto int_f =
ConvertTo(df, integer);
1948 const auto ret =
Round(v);
1957 const auto ret =
Round(v);
1964 template <
class D, HWY_IF_UNSIGNED_D(D)>
1965 HWY_API VFromD<D>
Iota(
const D d, TFromD<D> first) {
1969 template <
class D, HWY_IF_SIGNED_D(D)>
1970 HWY_API VFromD<D>
Iota(
const D d, TFromD<D> first) {
1971 const RebindToUnsigned<D> du;
1975 template <
class D, HWY_IF_FLOAT_D(D)>
1976 HWY_API VFromD<D>
Iota(
const D d, TFromD<D> first) {
1977 const RebindToUnsigned<D> du;
1978 const RebindToSigned<D> di;
1986 #define HWY_RVV_SLIDE1(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP) \
1987 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
1988 return v##OP##_vx_##CHAR##SEW##LMUL(v, 0, HWY_RVV_AVL(SEW, SHIFT)); \
1995 #undef HWY_RVV_SLIDE1
1998 template <
class V, HWY_IF_LANE_SIZE_V(V, 4)>
2000 const auto lo =
Mul(a, b);
2007 template <
class V, HWY_IF_LANE_SIZE_V(V, 8)>
2011 return OddEven(detail::Slide1Up(hi), lo);
2014 template <
class V, HWY_IF_LANE_SIZE_V(V, 8)>
2018 return OddEven(hi, detail::Slide1Down(lo));
2023 template <
size_t N,
class DF = Simd<float, N / 2>>
2028 const VFromD<decltype(du32)> b_in_even = ShiftRight<16>(
BitCast(du32, b));
2034 template <
size_t N,
class DU16 = Simd<u
int16_t, N * 2>>
2037 const VFromD<decltype(df32)> sum0,
2038 VFromD<decltype(df32)>& sum1)
2039 ->
VFromD<decltype(df32)> {
2042 using VU32 =
VFromD<decltype(du32)>;
2054 #undef HWY_IF_FLOAT_V
2055 #undef HWY_IF_SIGNED_V
2056 #undef HWY_IF_UNSIGNED_V
2058 #undef HWY_RVV_FOREACH
2059 #undef HWY_RVV_FOREACH_08
2060 #undef HWY_RVV_FOREACH_16
2061 #undef HWY_RVV_FOREACH_32
2062 #undef HWY_RVV_FOREACH_64
2063 #undef HWY_RVV_FOREACH_B
2064 #undef HWY_RVV_FOREACH_F
2065 #undef HWY_RVV_FOREACH_F32
2066 #undef HWY_RVV_FOREACH_F64
2067 #undef HWY_RVV_FOREACH_I
2068 #undef HWY_RVV_FOREACH_I08
2069 #undef HWY_RVV_FOREACH_I16
2070 #undef HWY_RVV_FOREACH_I32
2071 #undef HWY_RVV_FOREACH_I64
2072 #undef HWY_RVV_FOREACH_U
2073 #undef HWY_RVV_FOREACH_U08
2074 #undef HWY_RVV_FOREACH_U16
2075 #undef HWY_RVV_FOREACH_U32
2076 #undef HWY_RVV_FOREACH_U64
2077 #undef HWY_RVV_FOREACH_UI
2078 #undef HWY_RVV_FOREACH_UI16
2079 #undef HWY_RVV_FOREACH_UI32
2080 #undef HWY_RVV_FOREACH_UI64
2082 #undef HWY_RVV_RETV_ARGV
2083 #undef HWY_RVV_RETV_ARGVS
2084 #undef HWY_RVV_RETV_ARGVV
#define HWY_RESTRICT
Definition: base.h:58
#define HWY_API
Definition: base.h:117
#define HWY_MIN(a, b)
Definition: base.h:122
#define HWY_INLINE
Definition: base.h:59
#define HWY_DASSERT(condition)
Definition: base.h:163
HWY_INLINE VFromD< DU > BitCastToUnsigned(V v)
Definition: rvv-inl.h:393
HWY_INLINE Mask128< float, N > UseInt(const Vec128< float, N > v)
Definition: arm_neon-inl.h:2811
RoundingModes
Definition: rvv-inl.h:1913
@ kUp
Definition: rvv-inl.h:1913
@ kNear
Definition: rvv-inl.h:1913
@ kTrunc
Definition: rvv-inl.h:1913
@ kDown
Definition: rvv-inl.h:1913
constexpr size_t LanesPerBlock(Simd< T, N >)
Definition: arm_sve-inl.h:1472
HWY_INLINE V OffsetsOf128BitBlocks(const D d, const V iota0)
Definition: arm_sve-inl.h:1478
HWY_INLINE Mask128< T, N > MaskFromVec(hwy::SizeTag< 1 >, const Vec128< T, N > v)
Definition: x86_128-inl.h:1278
svbool_t FirstNPerBlock(D d)
Definition: arm_sve-inl.h:1484
HWY_INLINE Vec128< uint8_t, N > BitCastFromByte(Simd< uint8_t, N >, Vec128< uint8_t, N > v)
Definition: arm_neon-inl.h:578
HWY_INLINE VFromD< DU > Iota0(const D)
Definition: rvv-inl.h:412
HWY_INLINE Vec128< uint8_t, N > BitCastToByte(Vec128< uint8_t, N > v)
Definition: arm_neon-inl.h:551
HWY_INLINE Vu16m1 DemoteTo(Du16m1 d, const Vu32m2 v)
Definition: rvv-inl.h:1176
HWY_API Vec128< T, N > CopySign(const Vec128< T, N > magn, const Vec128< T, N > sign)
Definition: arm_neon-inl.h:1573
decltype(MaskFromVec(Zero(D()))) MFromD
Definition: rvv-inl.h:797
svuint16_t Set(Simd< bfloat16_t, N > d, bfloat16_t arg)
Definition: arm_sve-inl.h:299
HWY_API Vec128< uint32_t, N > TableLookupLanes(const Vec128< uint32_t, N > v, const Indices128< uint32_t, N > idx)
Definition: arm_neon-inl.h:3342
HWY_API Vec128< T, N > ShiftRightBytes(Simd< T, N >, Vec128< T, N > v)
Definition: arm_neon-inl.h:3064
HWY_API Vec128< float > ApproximateReciprocal(const Vec128< float > v)
Definition: arm_neon-inl.h:1167
HWY_API uint8_t GetLane(const Vec128< uint8_t, 16 > v)
Definition: arm_neon-inl.h:744
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5035
HWY_API Vec128< uint64_t > InterleaveLower(const Vec128< uint64_t > a, const Vec128< uint64_t > b)
Definition: arm_neon-inl.h:3435
HWY_API Vec128< float, N > Round(const Vec128< float, N > v)
Definition: arm_neon-inl.h:2829
HWY_API auto Eq(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5027
HWY_API Vec128< T, N > ZeroExtendVector(Simd< T, N > d, Vec128< T, N/2 > lo)
Definition: arm_neon-inl.h:3629
HWY_API V128 CombineShiftRightBytes(Full128< T > d, V128 hi, V128 lo)
Definition: arm_neon-inl.h:2949
HWY_API Vec128< T, N > ShiftLeftLanes(Simd< T, N > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:3052
HWY_API Vec128< T, N > MinOfLanes(Simd< T, N >, const Vec128< T, N > v)
Definition: arm_neon-inl.h:4207
HWY_API auto Gt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5040
HWY_API Mask128< T, N > FirstN(const Simd< T, N > d, size_t num)
Definition: arm_neon-inl.h:1806
HWY_API Vec128< T, N > Load(Simd< T, N > d, const T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2152
HWY_API Vec128< float, N > MulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1232
HWY_API Vec128< T, N > LoadDup128(Simd< T, N > d, const T *const HWY_RESTRICT p)
Definition: arm_neon-inl.h:2164
HWY_API Vec128< int8_t > Abs(const Vec128< int8_t > v)
Definition: arm_neon-inl.h:1529
HWY_API Vec128< T, N > GatherOffset(const Simd< T, N > d, const T *HWY_RESTRICT base, const Vec128< Offset, N > offset)
Definition: arm_neon-inl.h:4054
Repartition< MakeWide< TFromD< D > >, D > RepartitionToWide
Definition: shared-inl.h:158
HWY_API void Stream(const Vec128< T, N > v, Simd< T, N > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2352
V Shl(V a, V b)
Definition: arm_neon-inl.h:5018
HWY_API Vec128< T, N > VecFromMask(const Mask128< T, N > v)
Definition: arm_neon-inl.h:1607
HWY_API auto Ge(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5044
HWY_API Vec128< float > AbsDiff(const Vec128< float > a, const Vec128< float > b)
Definition: arm_neon-inl.h:1206
HWY_API Vec128< T, N > ConcatUpperUpper(const Simd< T, N > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:3681
HWY_API Vec128< uint64_t, N > Min(const Vec128< uint64_t, N > a, const Vec128< uint64_t, N > b)
Definition: arm_neon-inl.h:1879
HWY_API Vec128< uint64_t, N > Max(const Vec128< uint64_t, N > a, const Vec128< uint64_t, N > b)
Definition: arm_neon-inl.h:1917
HWY_API Indices128< T, N > SetTableIndices(Simd< T, N > d, const int32_t *idx)
Definition: arm_neon-inl.h:3323
HWY_API Mask128< T, N > MaskFromVec(const Vec128< T, N > v)
Definition: arm_neon-inl.h:1600
Rebind< MakeUnsigned< TFromD< D > >, D > RebindToUnsigned
Definition: shared-inl.h:149
constexpr HWY_API size_t Lanes(Simd< T, N >)
Definition: arm_sve-inl.h:226
HWY_INLINE Vec128< uint64_t > MulOdd(Vec128< uint64_t > a, Vec128< uint64_t > b)
Definition: arm_neon-inl.h:3947
HWY_API Vec128< T, N > ConcatLowerUpper(const Simd< T, N > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:3726
HWY_API Vec128< T > Shuffle01(const Vec128< T > v)
Definition: arm_neon-inl.h:3389
HWY_API Vec1< uint8_t > SaturatedAdd(const Vec1< uint8_t > a, const Vec1< uint8_t > b)
Definition: scalar-inl.h:422
HWY_API Vec128< T, N > ShiftRightLanes(Simd< T, N > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:3070
Vec128< T, N > Iota(const Simd< T, N > d, const T2 first)
Definition: arm_neon-inl.h:734
HWY_API Vec128< int64_t > MulEven(Vec128< int32_t > a, Vec128< int32_t > b)
Definition: arm_neon-inl.h:3907
HWY_API Vec128< T, N/2 > LowerHalf(const Vec128< T, N > v)
Definition: arm_neon-inl.h:2903
typename D::Twice Twice
Definition: shared-inl.h:168
Rebind< MakeSigned< TFromD< D > >, D > RebindToSigned
Definition: shared-inl.h:147
HWY_API Vec128< uint16_t, 4 > DemoteTo(Simd< uint16_t, 4 >, const Vec128< int32_t > v)
Definition: arm_neon-inl.h:2546
HWY_API Mask128< TTo, N > RebindMask(Simd< TTo, N > dto, Mask128< TFrom, N > m)
Definition: arm_neon-inl.h:1619
HWY_API Vec128< T, N > Undefined(Simd< T, N >)
Definition: arm_neon-inl.h:724
HWY_API intptr_t FindFirstTrue(const Simd< T, N >, const Mask128< T, N > mask)
Definition: arm_neon-inl.h:4520
HWY_API size_t CountTrue(Full128< T >, const Mask128< T > mask)
Definition: arm_neon-inl.h:4509
HWY_API Vec128< float > ConvertTo(Full128< float >, const Vec128< int32_t > v)
Definition: arm_neon-inl.h:2739
HWY_API Vec128< T, N > IfThenElseZero(const Mask128< T, N > mask, const Vec128< T, N > yes)
Definition: arm_neon-inl.h:1642
HWY_API V Add(V a, V b)
Definition: arm_neon-inl.h:5000
HWY_API Vec128< T > Shuffle0321(const Vec128< T > v)
Definition: arm_neon-inl.h:3395
HWY_API bool AllFalse(const Full128< T > d, const Mask128< T > m)
Definition: arm_neon-inl.h:4538
HWY_API void StoreInterleaved3(const Vec128< uint8_t > v0, const Vec128< uint8_t > v1, const Vec128< uint8_t > v2, Full128< uint8_t >, uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:4829
HWY_API Vec128< float, N > Floor(const Vec128< float, N > v)
Definition: arm_neon-inl.h:2860
HWY_API VFromD< DW > ZipUpper(DW dw, Vec128< T, N > a, Vec128< T, N > b)
Definition: arm_neon-inl.h:3538
HWY_API Vec128< float, N > MulSub(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > sub)
Definition: arm_neon-inl.h:1288
HWY_API Vec128< T, N > CopySignToAbs(const Vec128< T, N > abs, const Vec128< T, N > sign)
Definition: arm_neon-inl.h:1581
HWY_API Vec128< T, N > ZeroIfNegative(Vec128< T, N > v)
Definition: arm_neon-inl.h:1655
HWY_API void StoreU(const Vec128< uint8_t > v, Full128< uint8_t >, uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2170
HWY_API Vec128< float, N > Ceil(const Vec128< float, N > v)
Definition: arm_neon-inl.h:2846
Simd< T,(kShift<=0) ?(HWY_LANES(T) > >(-kShift)) :0 > Full
Definition: arm_sve-inl.h:36
HWY_API Vec1< uint8_t > AverageRound(const Vec1< uint8_t > a, const Vec1< uint8_t > b)
Definition: scalar-inl.h:475
HWY_API Vec1< T > ShiftRight(const Vec1< T > v)
Definition: scalar-inl.h:325
HWY_API Mask128< uint64_t, N > TestBit(Vec128< uint64_t, N > v, Vec128< uint64_t, N > bit)
Definition: arm_neon-inl.h:1827
HWY_API Vec128< T, N > ShiftLeftBytes(Simd< T, N >, Vec128< T, N > v)
Definition: arm_neon-inl.h:3041
HWY_API Vec128< uint8_t > LoadU(Full128< uint8_t >, const uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:1953
HWY_API Vec128< int16_t > MulHigh(const Vec128< int16_t > a, const Vec128< int16_t > b)
Definition: arm_neon-inl.h:1126
HWY_API Vec1< uint8_t > SaturatedSub(const Vec1< uint8_t > a, const Vec1< uint8_t > b)
Definition: scalar-inl.h:449
HWY_API Vec128< uint8_t > Combine(Full128< uint8_t >, Vec128< uint8_t, 8 > hi, Vec128< uint8_t, 8 > lo)
Definition: arm_neon-inl.h:3566
HWY_API Vec128< uint16_t > PromoteTo(Full128< uint16_t >, const Vec128< uint8_t, 8 > v)
Definition: arm_neon-inl.h:2362
HWY_API Vec128< T, N > And(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1384
HWY_API Vec128< T, N > GatherIndex(const Simd< T, N > d, const T *HWY_RESTRICT base, const Vec128< Index, N > index)
Definition: arm_neon-inl.h:4071
HWY_API Vec128< T, N > MaskedLoad(Mask128< T, N > m, Simd< T, N > d, const T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2157
HWY_API Vec128< T, N > BitCast(Simd< T, N > d, Vec128< FromT, N *sizeof(T)/sizeof(FromT)> v)
Definition: arm_neon-inl.h:687
HWY_API void ScatterIndex(Vec128< T, N > v, Simd< T, N > d, T *HWY_RESTRICT base, const Vec128< Index, N > index)
Definition: arm_neon-inl.h:4036
HWY_API V Sub(V a, V b)
Definition: arm_neon-inl.h:5004
constexpr HWY_INLINE size_t MLenFromD(Simd< T, N >)
Definition: rvv-inl.h:38
HWY_API Vec128< T > Reverse(Full128< T >, const Vec128< T > v)
Definition: arm_neon-inl.h:3362
HWY_API Vec128< T, N > ConcatLowerLower(const Simd< T, N > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:3637
typename D::template Rebind< T > Rebind
Definition: shared-inl.h:144
HWY_API V InterleaveUpper(Simd< T, N >, V a, V b)
Definition: arm_neon-inl.h:3511
HWY_API Vec128< T, N > IfThenZeroElse(const Mask128< T, N > mask, const Vec128< T, N > no)
Definition: arm_neon-inl.h:1649
HWY_API Vec128< uint32_t, 2 > Shuffle2301(const Vec128< uint32_t, 2 > v)
Definition: arm_neon-inl.h:1698
HWY_API Vec128< T, N > Xor(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1430
HWY_API Vec1< T > ShiftLeft(const Vec1< T > v)
Definition: scalar-inl.h:319
HWY_API auto Le(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5049
decltype(detail::DeduceD()(V())) DFromV
Definition: arm_neon-inl.h:532
HWY_API Vec128< int32_t, N > NearestInt(const Vec128< float, N > v)
Definition: arm_neon-inl.h:2890
HWY_API Vec128< T > Not(const Vec128< T > v)
Definition: arm_neon-inl.h:1366
HWY_API Vec128< float, N > NegMulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1266
V Shr(V a, V b)
Definition: arm_neon-inl.h:5022
decltype(Zero(D())) VFromD
Definition: arm_neon-inl.h:720
HWY_API Vec128< T, N > OddEven(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:3844
HWY_API Vec128< T, N > MaxOfLanes(Simd< T, N >, const Vec128< T, N > v)
Definition: arm_neon-inl.h:4211
Neg(const Vec128< int64_t, 1 > v)
Definition: arm_neon-inl.h:866
HWY_API Vec128< uint8_t, 4 > U8FromU32(const Vec128< uint32_t > v)
Definition: arm_neon-inl.h:2699
HWY_API Vec128< uint16_t > Broadcast(const Vec128< uint16_t > v)
Definition: arm_neon-inl.h:3235
HWY_API Vec128< T, N > ConcatUpperLower(Simd< T, N > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:3752
HWY_API Vec128< float, N > Trunc(const Vec128< float, N > v)
Definition: arm_neon-inl.h:2818
HWY_API VFromD< DW > ZipLower(Vec128< T, N > a, Vec128< T, N > b)
Definition: arm_neon-inl.h:3527
typename D::template Repartition< T > Repartition
Definition: shared-inl.h:155
HWY_API Vec128< T, N > SumOfLanes(Simd< T, N >, const Vec128< T, N > v)
Definition: arm_neon-inl.h:4203
HWY_API void ScatterOffset(Vec128< T, N > v, Simd< T, N > d, T *HWY_RESTRICT base, const Vec128< Offset, N > offset)
Definition: arm_neon-inl.h:4019
HWY_API Vec128< bfloat16_t, 2 *N > ReorderDemote2To(Simd< bfloat16_t, 2 *N > dbf16, Vec128< float, N > a, Vec128< float, N > b)
Definition: arm_neon-inl.h:2665
HWY_API size_t CompressStore(Vec128< T, N > v, const Mask128< T, N > mask, Simd< T, N > d, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:4802
HWY_API Vec128< T > Shuffle2103(const Vec128< T > v)
Definition: arm_neon-inl.h:3401
HWY_API auto Ne(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5031
HWY_API Vec128< float, N > Sqrt(const Vec128< float, N > v)
Definition: arm_neon-inl.h:1348
HWY_API Vec128< TI > TableLookupBytes(const Vec128< T > bytes, const Vec128< TI > from)
Definition: arm_neon-inl.h:3957
HWY_API void StoreInterleaved4(const Vec128< uint8_t > v0, const Vec128< uint8_t > v1, const Vec128< uint8_t > v2, const Vec128< uint8_t > v3, Full128< uint8_t >, uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:4864
HWY_API Vec128< T, N > AndNot(const Vec128< T, N > not_mask, const Vec128< T, N > mask)
Definition: arm_neon-inl.h:1398
HWY_API V Div(V a, V b)
Definition: arm_neon-inl.h:5013
HWY_API Vec128< uint8_t, 8 > UpperHalf(Simd< uint8_t, 8 >, const Vec128< uint8_t > v)
Definition: arm_neon-inl.h:3096
HWY_API Vec128< T > Shuffle0123(const Vec128< T > v)
Definition: arm_neon-inl.h:3407
HWY_API V Mul(V a, V b)
Definition: arm_neon-inl.h:5009
HWY_API Vec128< T, N > BroadcastSignBit(const Vec128< T, N > v)
Definition: arm_neon-inl.h:1590
TFromD< DFromV< V > > TFromV
Definition: arm_neon-inl.h:535
HWY_API Vec128< T > Shuffle1032(const Vec128< T > v)
Definition: arm_neon-inl.h:3385
HWY_API Vec128< float > ApproximateReciprocalSqrt(const Vec128< float > v)
Definition: arm_neon-inl.h:1320
HWY_API V Trunc(const V v)
Definition: rvv-inl.h:1934
HWY_API Vec128< T, N > Compress(Vec128< T, N > v, const Mask128< T, N > mask)
Definition: arm_neon-inl.h:4780
HWY_API Vec128< T, N > Zero(Simd< T, N > d)
Definition: arm_neon-inl.h:710
HWY_API V CombineShiftRightLanes(const D d, const V hi, V lo)
Definition: rvv-inl.h:1562
HWY_API void Store(Vec128< T, N > v, Simd< T, N > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2343
typename D::T TFromD
Definition: shared-inl.h:140
HWY_API Vec128< float, N > ReorderWidenMulAccumulate(Simd< float, N > df32, Vec128< bfloat16_t, 2 *N > a, Vec128< bfloat16_t, 2 *N > b, const Vec128< float, N > sum0, Vec128< float, N > &sum1)
Definition: arm_neon-inl.h:3545
HWY_API VI TableLookupBytesOr0(const V bytes, const VI from)
Definition: arm_neon-inl.h:4012
HWY_API Vec128< T, N > Or(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1419
HWY_API Vec128< float, N > NegMulSub(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > sub)
Definition: arm_neon-inl.h:1296
HWY_API Vec1< T > IfThenElse(const Mask1< T > mask, const Vec1< T > yes, const Vec1< T > no)
Definition: scalar-inl.h:263
Definition: aligned_allocator.h:23
constexpr T MantissaEnd()
Definition: base.h:386
constexpr HWY_API bool IsSame()
Definition: base.h:260
typename detail::Relations< T >::Signed MakeSigned
Definition: base.h:523
#define HWY_RVV_SHIFT_VV(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:561
#define HWY_RVV_SLIDE(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1363
#define HWY_RVV_PROMOTE_X2(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN)
Definition: rvv-inl.h:1109
#define HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP)
Definition: rvv-inl.h:105
#define HWY_SPECIALIZE(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:181
#define HWY_RVV_RETM_ARGVV(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:674
#define HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP)
Definition: rvv-inl.h:117
#define HWY_RVV_LOAD(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:891
#define HWY_RVV_IOTA(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:403
#define HWY_RVV_VEC_FROM_MASK(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:809
#define HWY_RVV_SCATTER(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:976
#define HWY_RVV_MASKED_LOAD(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:931
#define HWY_RVV_PROMOTE_X4(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN)
Definition: rvv-inl.h:1115
#define HWY_RVV_FOREACH_I(X_MACRO, NAME, OP)
Definition: rvv-inl.h:129
#define HWY_RVV_TABLE(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1504
#define HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP)
Definition: rvv-inl.h:115
#define HWY_RVV_RETV_ARGV2(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1880
#define HWY_RVV_SLIDE1(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1986
#define HWY_RVV_FOREACH(X_MACRO, NAME, OP)
Definition: rvv-inl.h:164
#define HWY_RVV_CAST_U(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:334
#define HWY_RVV_FOREACH_UI16(X_MACRO, NAME, OP)
Definition: rvv-inl.h:147
#define HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP)
Definition: rvv-inl.h:111
#define HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP)
Definition: rvv-inl.h:107
#define HWY_RVV_COMPRESS(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1525
#define HWY_RVV_SHIFT(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:540
#define HWY_RVV_GET_LANE(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1469
#define HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP)
Definition: rvv-inl.h:101
#define HWY_RVV_REDUCE(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1769
#define HWY_RVV_CONVERT(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1308
#define HWY_RVV_FOREACH_F(X_MACRO, NAME, OP)
Definition: rvv-inl.h:141
#define HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP)
Definition: rvv-inl.h:99
#define HWY_RVV_FOREACH_B(X_MACRO, NAME, OP)
Definition: rvv-inl.h:57
#define HWY_RVV_FOREACH_UI32(X_MACRO, NAME, OP)
Definition: rvv-inl.h:151
#define HWY_RVV_CAST_IF(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:344
#define HWY_RVV_RETM_ARGM(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:734
#define HWY_RVV_CAST_I8(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:322
#define HWY_RVV_LANES(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:207
#define HWY_RVV_ALL_TRUE(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:865
#define HWY_RVV_FOREACH_UI(X_MACRO, NAME, OP)
Definition: rvv-inl.h:160
#define HWY_RVV_RETM_ARGVS(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:683
#define HWY_RVV_TRUNC(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1431
#define HWY_RVV_CAST_U8(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:313
#define HWY_RVV_FMA(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:647
#define HWY_RVV_RETM_ARGMM(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:746
#define HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP)
Definition: rvv-inl.h:95
#define HWY_RVV_IF_THEN_ELSE(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:765
#define HWY_RVV_SET(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:252
#define HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP)
Definition: rvv-inl.h:97
#define HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP)
Definition: rvv-inl.h:109
#define HWY_RVV_FOREACH_UI64(X_MACRO, NAME, OP)
Definition: rvv-inl.h:155
#define HWY_RVV_STORE_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1836
#define HWY_RVV_GATHER(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1012
#define HWY_RVV_FIND_FIRST_TRUE(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:847
#define HWY_RVV_UNDEFINED(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:292
#define HWY_RVV_RET_ARGVDP(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:943
#define HWY_RVV_RETV_ARGVV(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:241
#define HWY_RVV_FOREACH_U(X_MACRO, NAME, OP)
Definition: rvv-inl.h:123
#define HWY_RVV_RETV_ARGV(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:226
#define HWY_RVV_SHIFT_II(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:570
#define HWY_RVV_COUNT_TRUE(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:877
#define HWY_RVV_RETV_ARGVS(BASE, CHAR, SEW, LMUL, X2, HALF, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:233
#define HWY_LANES(T)
Definition: set_macros-inl.h:80
#define HWY_NAMESPACE
Definition: set_macros-inl.h:77
Definition: shared-inl.h:35