19 #include <riscv_vector.h>
33 using DFromV =
typename DFromV_t<RemoveConst<V>>::type;
36 using TFromV = TFromD<DFromV<V>>;
39 #define HWY_RVV_IF_POW2_IN(D, min, max) \
40 hwy::EnableIf<(min) <= Pow2(D()) && Pow2(D()) <= (max)>* = nullptr
42 template <
typename T,
size_t N,
int kPow2>
59 #define HWY_RVV_FOREACH_B(X_MACRO, NAME, OP) \
60 X_MACRO(64, 0, 64, NAME, OP) \
61 X_MACRO(32, 0, 32, NAME, OP) \
62 X_MACRO(16, 0, 16, NAME, OP) \
63 X_MACRO(8, 0, 8, NAME, OP) \
64 X_MACRO(8, 1, 4, NAME, OP) \
65 X_MACRO(8, 2, 2, NAME, OP) \
66 X_MACRO(8, 3, 1, NAME, OP)
78 #define HWY_RVV_FOREACH_08_TRUNC(X_MACRO, BASE, CHAR, NAME, OP) \
79 X_MACRO(BASE, CHAR, 8, 16, __, mf4, mf2, mf8, -2, 32, NAME, OP) \
80 X_MACRO(BASE, CHAR, 8, 16, __, mf2, m1, mf4, -1, 16, NAME, OP) \
81 X_MACRO(BASE, CHAR, 8, 16, __, m1, m2, mf2, 0, 8, NAME, OP) \
82 X_MACRO(BASE, CHAR, 8, 16, __, m2, m4, m1, 1, 4, NAME, OP) \
83 X_MACRO(BASE, CHAR, 8, 16, __, m4, m8, m2, 2, 2, NAME, OP) \
84 X_MACRO(BASE, CHAR, 8, 16, __, m8, __, m4, 3, 1, NAME, OP)
86 #define HWY_RVV_FOREACH_16_TRUNC(X_MACRO, BASE, CHAR, NAME, OP) \
87 X_MACRO(BASE, CHAR, 16, 32, 8, mf2, m1, mf4, -1, 32, NAME, OP) \
88 X_MACRO(BASE, CHAR, 16, 32, 8, m1, m2, mf2, 0, 16, NAME, OP) \
89 X_MACRO(BASE, CHAR, 16, 32, 8, m2, m4, m1, 1, 8, NAME, OP) \
90 X_MACRO(BASE, CHAR, 16, 32, 8, m4, m8, m2, 2, 4, NAME, OP) \
91 X_MACRO(BASE, CHAR, 16, 32, 8, m8, __, m4, 3, 2, NAME, OP)
93 #define HWY_RVV_FOREACH_32_TRUNC(X_MACRO, BASE, CHAR, NAME, OP) \
94 X_MACRO(BASE, CHAR, 32, 64, 16, m1, m2, mf2, 0, 32, NAME, OP) \
95 X_MACRO(BASE, CHAR, 32, 64, 16, m2, m4, m1, 1, 16, NAME, OP) \
96 X_MACRO(BASE, CHAR, 32, 64, 16, m4, m8, m2, 2, 8, NAME, OP) \
97 X_MACRO(BASE, CHAR, 32, 64, 16, m8, __, m4, 3, 4, NAME, OP)
99 #define HWY_RVV_FOREACH_64_TRUNC(X_MACRO, BASE, CHAR, NAME, OP) \
100 X_MACRO(BASE, CHAR, 64, __, 32, m2, m4, m1, 1, 32, NAME, OP) \
101 X_MACRO(BASE, CHAR, 64, __, 32, m4, m8, m2, 2, 16, NAME, OP) \
102 X_MACRO(BASE, CHAR, 64, __, 32, m8, __, m4, 3, 8, NAME, OP)
105 #define HWY_RVV_FOREACH_08_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
106 X_MACRO(BASE, CHAR, 8, 16, __, mf4, mf2, mf8, -2, 32, NAME, OP) \
107 X_MACRO(BASE, CHAR, 8, 16, __, mf2, m1, mf4, -1, 16, NAME, OP) \
108 X_MACRO(BASE, CHAR, 8, 16, __, m1, m2, mf2, 0, 8, NAME, OP) \
109 X_MACRO(BASE, CHAR, 8, 16, __, m2, m4, m1, 1, 4, NAME, OP) \
110 X_MACRO(BASE, CHAR, 8, 16, __, m4, m8, m2, 2, 2, NAME, OP) \
111 X_MACRO(BASE, CHAR, 8, 16, __, m8, __, m4, 3, 1, NAME, OP)
113 #define HWY_RVV_FOREACH_16_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
114 X_MACRO(BASE, CHAR, 16, 32, 8, mf4, mf2, mf8, -2, 64, NAME, OP) \
115 X_MACRO(BASE, CHAR, 16, 32, 8, mf2, m1, mf4, -1, 32, NAME, OP) \
116 X_MACRO(BASE, CHAR, 16, 32, 8, m1, m2, mf2, 0, 16, NAME, OP) \
117 X_MACRO(BASE, CHAR, 16, 32, 8, m2, m4, m1, 1, 8, NAME, OP) \
118 X_MACRO(BASE, CHAR, 16, 32, 8, m4, m8, m2, 2, 4, NAME, OP) \
119 X_MACRO(BASE, CHAR, 16, 32, 8, m8, __, m4, 3, 2, NAME, OP)
121 #define HWY_RVV_FOREACH_32_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
122 X_MACRO(BASE, CHAR, 32, 64, 16, mf2, m1, mf4, -1, 64, NAME, OP) \
123 X_MACRO(BASE, CHAR, 32, 64, 16, m1, m2, mf2, 0, 32, NAME, OP) \
124 X_MACRO(BASE, CHAR, 32, 64, 16, m2, m4, m1, 1, 16, NAME, OP) \
125 X_MACRO(BASE, CHAR, 32, 64, 16, m4, m8, m2, 2, 8, NAME, OP) \
126 X_MACRO(BASE, CHAR, 32, 64, 16, m8, __, m4, 3, 4, NAME, OP)
128 #define HWY_RVV_FOREACH_64_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
129 X_MACRO(BASE, CHAR, 64, __, 32, m1, m2, mf2, 0, 64, NAME, OP) \
130 X_MACRO(BASE, CHAR, 64, __, 32, m2, m4, m1, 1, 32, NAME, OP) \
131 X_MACRO(BASE, CHAR, 64, __, 32, m4, m8, m2, 2, 16, NAME, OP) \
132 X_MACRO(BASE, CHAR, 64, __, 32, m8, __, m4, 3, 8, NAME, OP)
135 #define HWY_RVV_FOREACH_08_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
136 X_MACRO(BASE, CHAR, 8, 16, __, mf8, mf4, __, -3, 64, NAME, OP) \
137 X_MACRO(BASE, CHAR, 8, 16, __, mf4, mf2, mf8, -2, 32, NAME, OP) \
138 X_MACRO(BASE, CHAR, 8, 16, __, mf2, m1, mf4, -1, 16, NAME, OP) \
139 X_MACRO(BASE, CHAR, 8, 16, __, m1, m2, mf2, 0, 8, NAME, OP) \
140 X_MACRO(BASE, CHAR, 8, 16, __, m2, m4, m1, 1, 4, NAME, OP) \
141 X_MACRO(BASE, CHAR, 8, 16, __, m4, m8, m2, 2, 2, NAME, OP)
143 #define HWY_RVV_FOREACH_16_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
144 X_MACRO(BASE, CHAR, 16, 32, 8, mf4, mf2, mf8, -2, 64, NAME, OP) \
145 X_MACRO(BASE, CHAR, 16, 32, 8, mf2, m1, mf4, -1, 32, NAME, OP) \
146 X_MACRO(BASE, CHAR, 16, 32, 8, m1, m2, mf2, 0, 16, NAME, OP) \
147 X_MACRO(BASE, CHAR, 16, 32, 8, m2, m4, m1, 1, 8, NAME, OP) \
148 X_MACRO(BASE, CHAR, 16, 32, 8, m4, m8, m2, 2, 4, NAME, OP)
150 #define HWY_RVV_FOREACH_32_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
151 X_MACRO(BASE, CHAR, 32, 64, 16, mf2, m1, mf4, -1, 64, NAME, OP) \
152 X_MACRO(BASE, CHAR, 32, 64, 16, m1, m2, mf2, 0, 32, NAME, OP) \
153 X_MACRO(BASE, CHAR, 32, 64, 16, m2, m4, m1, 1, 16, NAME, OP) \
154 X_MACRO(BASE, CHAR, 32, 64, 16, m4, m8, m2, 2, 8, NAME, OP)
156 #define HWY_RVV_FOREACH_64_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
157 X_MACRO(BASE, CHAR, 64, __, 32, m1, m2, mf2, 0, 64, NAME, OP) \
158 X_MACRO(BASE, CHAR, 64, __, 32, m2, m4, m1, 1, 32, NAME, OP) \
159 X_MACRO(BASE, CHAR, 64, __, 32, m4, m8, m2, 2, 16, NAME, OP)
162 #define HWY_RVV_FOREACH_08_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
163 HWY_RVV_FOREACH_08_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
164 X_MACRO(BASE, CHAR, 8, 16, __, m8, __, m4, 3, 1, NAME, OP)
166 #define HWY_RVV_FOREACH_16_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
167 HWY_RVV_FOREACH_16_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
168 X_MACRO(BASE, CHAR, 16, 32, 8, m8, __, m4, 3, 2, NAME, OP)
170 #define HWY_RVV_FOREACH_32_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
171 HWY_RVV_FOREACH_32_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
172 X_MACRO(BASE, CHAR, 32, 64, 16, m8, __, m4, 3, 4, NAME, OP)
174 #define HWY_RVV_FOREACH_64_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
175 HWY_RVV_FOREACH_64_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
176 X_MACRO(BASE, CHAR, 64, __, 32, m8, __, m4, 3, 8, NAME, OP)
194 #define HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
196 #define HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
197 X_MACRO(BASE, CHAR, 16, 32, 8, mf4, mf2, mf8, -3, 64, NAME, OP)
199 #define HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
200 X_MACRO(BASE, CHAR, 32, 64, 16, mf2, m1, mf4, -2, 64, NAME, OP)
202 #define HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
203 X_MACRO(BASE, CHAR, 64, __, 32, m1, m2, mf2, -1, 64, NAME, OP)
206 #define HWY_RVV_FOREACH_08_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
207 HWY_RVV_FOREACH_08_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
208 HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
210 #define HWY_RVV_FOREACH_16_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
211 HWY_RVV_FOREACH_16_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
212 HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
214 #define HWY_RVV_FOREACH_32_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
215 HWY_RVV_FOREACH_32_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
216 HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
218 #define HWY_RVV_FOREACH_64_ALL_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
219 HWY_RVV_FOREACH_64_ALL(X_MACRO, BASE, CHAR, NAME, OP) \
220 HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
223 #define HWY_RVV_FOREACH_08_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
224 HWY_RVV_FOREACH_08_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
225 HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
227 #define HWY_RVV_FOREACH_16_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
228 HWY_RVV_FOREACH_16_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
229 HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
231 #define HWY_RVV_FOREACH_32_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
232 HWY_RVV_FOREACH_32_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
233 HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
235 #define HWY_RVV_FOREACH_64_EXT_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
236 HWY_RVV_FOREACH_64_EXT(X_MACRO, BASE, CHAR, NAME, OP) \
237 HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
240 #define HWY_RVV_FOREACH_08_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
241 HWY_RVV_FOREACH_08_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
242 HWY_RVV_FOREACH_08_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
244 #define HWY_RVV_FOREACH_16_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
245 HWY_RVV_FOREACH_16_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
246 HWY_RVV_FOREACH_16_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
248 #define HWY_RVV_FOREACH_32_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
249 HWY_RVV_FOREACH_32_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
250 HWY_RVV_FOREACH_32_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
252 #define HWY_RVV_FOREACH_64_DEMOTE_VIRT(X_MACRO, BASE, CHAR, NAME, OP) \
253 HWY_RVV_FOREACH_64_DEMOTE(X_MACRO, BASE, CHAR, NAME, OP) \
254 HWY_RVV_FOREACH_64_VIRT(X_MACRO, BASE, CHAR, NAME, OP)
257 #define HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS) \
258 HWY_CONCAT(HWY_RVV_FOREACH_08, LMULS)(X_MACRO, uint, u, NAME, OP)
259 #define HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS) \
260 HWY_CONCAT(HWY_RVV_FOREACH_16, LMULS)(X_MACRO, uint, u, NAME, OP)
261 #define HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS) \
262 HWY_CONCAT(HWY_RVV_FOREACH_32, LMULS)(X_MACRO, uint, u, NAME, OP)
263 #define HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS) \
264 HWY_CONCAT(HWY_RVV_FOREACH_64, LMULS)(X_MACRO, uint, u, NAME, OP)
267 #define HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS) \
268 HWY_CONCAT(HWY_RVV_FOREACH_08, LMULS)(X_MACRO, int, i, NAME, OP)
269 #define HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS) \
270 HWY_CONCAT(HWY_RVV_FOREACH_16, LMULS)(X_MACRO, int, i, NAME, OP)
271 #define HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS) \
272 HWY_CONCAT(HWY_RVV_FOREACH_32, LMULS)(X_MACRO, int, i, NAME, OP)
273 #define HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS) \
274 HWY_CONCAT(HWY_RVV_FOREACH_64, LMULS)(X_MACRO, int, i, NAME, OP)
278 #define HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS) \
279 HWY_CONCAT(HWY_RVV_FOREACH_16, LMULS)(X_MACRO, float, f, NAME, OP)
281 #define HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS)
283 #define HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP, LMULS) \
284 HWY_CONCAT(HWY_RVV_FOREACH_32, LMULS)(X_MACRO, float, f, NAME, OP)
285 #define HWY_RVV_FOREACH_F64(X_MACRO, NAME, OP, LMULS) \
286 HWY_CONCAT(HWY_RVV_FOREACH_64, LMULS)(X_MACRO, float, f, NAME, OP)
289 #define HWY_RVV_FOREACH_UI08(X_MACRO, NAME, OP, LMULS) \
290 HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS) \
291 HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS)
293 #define HWY_RVV_FOREACH_UI16(X_MACRO, NAME, OP, LMULS) \
294 HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS) \
295 HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS)
297 #define HWY_RVV_FOREACH_UI32(X_MACRO, NAME, OP, LMULS) \
298 HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS) \
299 HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS)
301 #define HWY_RVV_FOREACH_UI64(X_MACRO, NAME, OP, LMULS) \
302 HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS) \
303 HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS)
305 #define HWY_RVV_FOREACH_UI3264(X_MACRO, NAME, OP, LMULS) \
306 HWY_RVV_FOREACH_UI32(X_MACRO, NAME, OP, LMULS) \
307 HWY_RVV_FOREACH_UI64(X_MACRO, NAME, OP, LMULS)
309 #define HWY_RVV_FOREACH_U163264(X_MACRO, NAME, OP, LMULS) \
310 HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS) \
311 HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS) \
312 HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS)
314 #define HWY_RVV_FOREACH_I163264(X_MACRO, NAME, OP, LMULS) \
315 HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS) \
316 HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS) \
317 HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS)
319 #define HWY_RVV_FOREACH_UI163264(X_MACRO, NAME, OP, LMULS) \
320 HWY_RVV_FOREACH_U163264(X_MACRO, NAME, OP, LMULS) \
321 HWY_RVV_FOREACH_I163264(X_MACRO, NAME, OP, LMULS)
323 #define HWY_RVV_FOREACH_F3264(X_MACRO, NAME, OP, LMULS) \
324 HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP, LMULS) \
325 HWY_RVV_FOREACH_F64(X_MACRO, NAME, OP, LMULS)
328 #define HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS) \
329 HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS) \
330 HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS) \
331 HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS) \
332 HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS)
334 #define HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS) \
335 HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS) \
336 HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS) \
337 HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS) \
338 HWY_RVV_FOREACH_I64(X_MACRO, NAME, OP, LMULS)
340 #define HWY_RVV_FOREACH_F(X_MACRO, NAME, OP, LMULS) \
341 HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS) \
342 HWY_RVV_FOREACH_F3264(X_MACRO, NAME, OP, LMULS)
345 #define HWY_RVV_FOREACH_UI(X_MACRO, NAME, OP, LMULS) \
346 HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS) \
347 HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS)
349 #define HWY_RVV_FOREACH(X_MACRO, NAME, OP, LMULS) \
350 HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS) \
351 HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS) \
352 HWY_RVV_FOREACH_F(X_MACRO, NAME, OP, LMULS)
355 #define HWY_RVV_T(BASE, SEW) BASE##SEW##_t
356 #define HWY_RVV_D(BASE, SEW, N, SHIFT) Simd<HWY_RVV_T(BASE, SEW), N, SHIFT>
357 #define HWY_RVV_V(BASE, SEW, LMUL) v##BASE##SEW##LMUL##_t
358 #define HWY_RVV_M(MLEN) vbool##MLEN##_t
364 #define HWY_SPECIALIZE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
367 struct DFromV_t<HWY_RVV_V(BASE, SEW, LMUL)> { \
368 using Lane = HWY_RVV_T(BASE, SEW); \
369 using type = ScalableTag<Lane, SHIFT>; \
373 #undef HWY_SPECIALIZE
379 #define HWY_RVV_LANES(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
381 template <size_t N> \
382 HWY_API size_t NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d) { \
383 size_t actual = v##OP##SEW##LMUL(); \
386 if (detail::IsFull(d)) return actual; \
390 if (detail::ScaleByPower(128 / SEW, SHIFT) == 1) actual >>= 1; \
391 return HWY_MIN(actual, N); \
397 template <
size_t N,
int kPow2>
406 #define HWY_RVV_AVL(SEW, SHIFT) \
407 Lanes(ScalableTag<HWY_RVV_T(uint, SEW), SHIFT>())
410 #define HWY_RVV_RETV_ARGV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
411 SHIFT, MLEN, NAME, OP) \
412 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
413 return v##OP##_v_##CHAR##SEW##LMUL(v, HWY_RVV_AVL(SEW, SHIFT)); \
417 #define HWY_RVV_RETV_ARGVS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
418 SHIFT, MLEN, NAME, OP) \
419 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
420 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_T(BASE, SEW) b) { \
421 return v##OP##_##CHAR##SEW##LMUL(a, b, HWY_RVV_AVL(SEW, SHIFT)); \
425 #define HWY_RVV_RETV_ARGVV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
426 SHIFT, MLEN, NAME, OP) \
427 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
428 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) { \
429 return v##OP##_vv_##CHAR##SEW##LMUL(a, b, HWY_RVV_AVL(SEW, SHIFT)); \
436 #define HWY_RVV_SET(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
438 template <size_t N> \
439 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
440 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_T(BASE, SEW) arg) { \
441 return v##OP##_##CHAR##SEW##LMUL(arg, Lanes(d)); \
450 template <
size_t N,
int kPow2>
457 using VFromD = decltype(
Set(D(), TFromD<D>()));
461 template <
typename T,
size_t N,
int kPow2>
472 #define HWY_RVV_UNDEFINED(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
473 SHIFT, MLEN, NAME, OP) \
474 template <size_t N> \
475 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
476 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) ) { \
477 return v##OP##_##CHAR##SEW##LMUL(); \
481 #undef HWY_RVV_UNDEFINED
494 #define HWY_RVV_TRUNC(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
496 HWY_API HWY_RVV_V(BASE, SEW, LMULH) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
497 return v##OP##_v_##CHAR##SEW##LMUL##_##CHAR##SEW##LMULH(v); \
503 #define HWY_RVV_EXT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
505 template <size_t N> \
506 HWY_API HWY_RVV_V(BASE, SEW, LMULD) \
507 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT + 1) , \
508 HWY_RVV_V(BASE, SEW, LMUL) v) { \
509 return v##OP##_v_##CHAR##SEW##LMUL##_##CHAR##SEW##LMULD(v); \
516 #define HWY_RVV_EXT_VIRT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
517 SHIFT, MLEN, NAME, OP) \
518 template <size_t N> \
519 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
520 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT + 1) , \
521 HWY_RVV_V(BASE, SEW, LMUL) v) { \
525 #undef HWY_RVV_EXT_VIRT
531 #define HWY_RVV_CAST_U8(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
532 SHIFT, MLEN, NAME, OP) \
533 template <typename T, size_t N> \
534 HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> , \
535 vuint8##LMUL##_t v) { \
538 template <size_t N> \
539 HWY_API vuint8##LMUL##_t BitCastFromByte( \
540 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMUL##_t v) { \
545 #define HWY_RVV_CAST_I8(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
546 SHIFT, MLEN, NAME, OP) \
547 template <typename T, size_t N> \
548 HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> , \
549 vint8##LMUL##_t v) { \
550 return vreinterpret_v_i8##LMUL##_u8##LMUL(v); \
552 template <size_t N> \
553 HWY_API vint8##LMUL##_t BitCastFromByte( \
554 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMUL##_t v) { \
555 return vreinterpret_v_u8##LMUL##_i8##LMUL(v); \
560 #define HWY_RVV_CAST_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
562 template <typename T, size_t N> \
563 HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> , \
564 HWY_RVV_V(BASE, SEW, LMUL) v) { \
565 return v##OP##_v_##CHAR##SEW##LMUL##_u8##LMUL(v); \
567 template <size_t N> \
568 HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \
569 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMUL##_t v) { \
570 return v##OP##_v_u8##LMUL##_##CHAR##SEW##LMUL(v); \
574 #define HWY_RVV_CAST_IF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
575 SHIFT, MLEN, NAME, OP) \
576 template <typename T, size_t N> \
577 HWY_API vuint8##LMUL##_t BitCastToByte(Simd<T, N, SHIFT> , \
578 HWY_RVV_V(BASE, SEW, LMUL) v) { \
579 return v##OP##_v_u##SEW##LMUL##_u8##LMUL( \
580 v##OP##_v_##CHAR##SEW##LMUL##_u##SEW##LMUL(v)); \
582 template <size_t N> \
583 HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \
584 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMUL##_t v) { \
585 return v##OP##_v_u##SEW##LMUL##_##CHAR##SEW##LMUL( \
586 v##OP##_v_u8##LMUL##_u##SEW##LMUL(v)); \
590 #define HWY_RVV_CAST_VIRT_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
591 SHIFT, MLEN, NAME, OP) \
592 template <typename T, size_t N> \
593 HWY_API vuint8##LMULH##_t BitCastToByte(Simd<T, N, SHIFT> , \
594 HWY_RVV_V(BASE, SEW, LMUL) v) { \
595 return detail::Trunc(v##OP##_v_##CHAR##SEW##LMUL##_u8##LMUL(v)); \
597 template <size_t N> \
598 HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \
599 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMULH##_t v) { \
600 HWY_RVV_D(uint, 8, N, SHIFT + 1) d2; \
601 const vuint8##LMUL##_t v2 = detail::Ext(d2, v); \
602 return v##OP##_v_u8##LMUL##_##CHAR##SEW##LMUL(v2); \
606 #define HWY_RVV_CAST_VIRT_IF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
607 SHIFT, MLEN, NAME, OP) \
608 template <typename T, size_t N> \
609 HWY_API vuint8##LMULH##_t BitCastToByte(Simd<T, N, SHIFT> , \
610 HWY_RVV_V(BASE, SEW, LMUL) v) { \
611 return detail::Trunc(v##OP##_v_u##SEW##LMUL##_u8##LMUL( \
612 v##OP##_v_##CHAR##SEW##LMUL##_u##SEW##LMUL(v))); \
614 template <size_t N> \
615 HWY_API HWY_RVV_V(BASE, SEW, LMUL) BitCastFromByte( \
616 HWY_RVV_D(BASE, SEW, N, SHIFT) , vuint8##LMULH##_t v) { \
617 HWY_RVV_D(uint, 8, N, SHIFT + 1) d2; \
618 const vuint8##LMUL##_t v2 = detail::Ext(d2, v); \
619 return v##OP##_v_u##SEW##LMUL##_##CHAR##SEW##LMUL( \
620 v##OP##_v_u8##LMUL##_u##SEW##LMUL(v2)); \
632 #undef HWY_RVV_CAST_U8
633 #undef HWY_RVV_CAST_I8
634 #undef HWY_RVV_CAST_U
635 #undef HWY_RVV_CAST_IF
636 #undef HWY_RVV_CAST_VIRT_U
637 #undef HWY_RVV_CAST_VIRT_IF
639 template <
size_t N,
int kPow2>
647 template <
class D,
class FromV>
654 template <
class V,
class DU = RebindToUn
signed<DFromV<V>>>
665 #define HWY_RVV_IOTA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
667 template <size_t N> \
668 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d) { \
669 return v##OP##_##CHAR##SEW##LMUL(Lanes(d)); \
675 template <
class D,
class DU = RebindToUn
signed<D>>
688 template <
class V, HWY_IF_FLOAT_V(V)>
704 template <
class V, HWY_IF_FLOAT_V(V)>
706 using DF = DFromV<V>;
707 using DU = RebindToUnsigned<DF>;
715 template <
class V, HWY_IF_FLOAT_V(V)>
717 using DF = DFromV<V>;
718 using DU = RebindToUnsigned<DF>;
731 template <
class V, HWY_IF_FLOAT_V(V)>
733 using DF = DFromV<V>;
734 using DU = RebindToUnsigned<DF>;
742 return And(
Not(not_a), b);
749 return Or(o,
And(a1, a2));
805 #define HWY_RVV_SHIFT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
807 template <int kBits> \
808 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
809 return v##OP##_vx_##CHAR##SEW##LMUL(v, kBits, HWY_RVV_AVL(SEW, SHIFT)); \
811 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
812 NAME##Same(HWY_RVV_V(BASE, SEW, LMUL) v, int bits) { \
813 return v##OP##_vx_##CHAR##SEW##LMUL(v, static_cast<uint8_t>(bits), \
814 HWY_RVV_AVL(SEW, SHIFT)); \
833 using VU16 =
VFromD<decltype(du16)>;
835 const VU16 vFDB97531 = ShiftRight<8>(
BitCast(du16,
v));
836 const VU16 vECA86420 = detail::AndS(
BitCast(du16,
v), 0xFF);
837 const VU16 sFE_DC_BA_98_76_54_32_10 =
Add(vFDB97531, vECA86420);
839 const VU16 szz_FE_zz_BA_zz_76_zz_32 =
840 BitCast(du16, ShiftRight<16>(
BitCast(du32, sFE_DC_BA_98_76_54_32_10)));
841 const VU16 sxx_FC_xx_B8_xx_74_xx_30 =
842 Add(sFE_DC_BA_98_76_54_32_10, szz_FE_zz_BA_zz_76_zz_32);
843 const VU16 szz_zz_xx_FC_zz_zz_xx_74 =
844 BitCast(du16, ShiftRight<32>(
BitCast(du64, sxx_FC_xx_B8_xx_74_xx_30)));
845 const VU16 sxx_xx_xx_F8_xx_xx_xx_70 =
846 Add(sxx_FC_xx_B8_xx_74_xx_30, szz_zz_xx_FC_zz_zz_xx_74);
847 return detail::AndS(
BitCast(du64, sxx_xx_xx_F8_xx_xx_xx_70), 0xFFFFull);
851 template <
int kBits,
class V>
853 constexpr
size_t kSizeInBits =
sizeof(TFromV<V>) * 8;
854 static_assert(0 <= kBits && kBits < kSizeInBits,
"Invalid shift count");
855 if (kBits == 0)
return v;
856 return Or(ShiftRight<kBits>(
v), ShiftLeft<kSizeInBits - kBits>(
v));
860 #define HWY_RVV_SHIFT_VV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
861 SHIFT, MLEN, NAME, OP) \
862 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
863 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, LMUL) bits) { \
864 return v##OP##_vv_##CHAR##SEW##LMUL(v, bits, HWY_RVV_AVL(SEW, SHIFT)); \
869 #define HWY_RVV_SHIFT_II(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
870 SHIFT, MLEN, NAME, OP) \
871 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
872 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, LMUL) bits) { \
873 return v##OP##_vv_##CHAR##SEW##LMUL(v, detail::BitCastToUnsigned(bits), \
874 HWY_RVV_AVL(SEW, SHIFT)); \
884 #undef HWY_RVV_SHIFT_II
885 #undef HWY_RVV_SHIFT_VV
949 #define HWY_RVV_FMA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
951 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
952 NAME(HWY_RVV_V(BASE, SEW, LMUL) mul, HWY_RVV_V(BASE, SEW, LMUL) x, \
953 HWY_RVV_V(BASE, SEW, LMUL) add) { \
954 return v##OP##_vv_##CHAR##SEW##LMUL(add, mul, x, HWY_RVV_AVL(SEW, SHIFT)); \
977 #define HWY_RVV_RETM_ARGVV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
978 SHIFT, MLEN, NAME, OP) \
979 HWY_API HWY_RVV_M(MLEN) \
980 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_V(BASE, SEW, LMUL) b) { \
981 return v##OP##_vv_##CHAR##SEW##LMUL##_b##MLEN(a, b, \
982 HWY_RVV_AVL(SEW, SHIFT)); \
986 #define HWY_RVV_RETM_ARGVS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
987 SHIFT, MLEN, NAME, OP) \
988 HWY_API HWY_RVV_M(MLEN) \
989 NAME(HWY_RVV_V(BASE, SEW, LMUL) a, HWY_RVV_T(BASE, SEW) b) { \
990 return v##OP##_##CHAR##SEW##LMUL##_b##MLEN(a, b, HWY_RVV_AVL(SEW, SHIFT)); \
1025 #undef HWY_RVV_RETM_ARGVV
1026 #undef HWY_RVV_RETM_ARGVS
1031 HWY_API auto Ge(
const V a,
const V b) -> decltype(
Le(a, b)) {
1036 HWY_API auto Gt(
const V a,
const V b) -> decltype(
Lt(a, b)) {
1043 return detail::NeS(
And(a, bit), 0);
1049 #define HWY_RVV_RETM_ARGM(SEW, SHIFT, MLEN, NAME, OP) \
1050 HWY_API HWY_RVV_M(MLEN) NAME(HWY_RVV_M(MLEN) m) { \
1051 return vm##OP##_m_b##MLEN(m, ~0ull); \
1056 #undef HWY_RVV_RETM_ARGM
1061 #define HWY_RVV_RETM_ARGMM(SEW, SHIFT, MLEN, NAME, OP) \
1062 HWY_API HWY_RVV_M(MLEN) NAME(HWY_RVV_M(MLEN) a, HWY_RVV_M(MLEN) b) { \
1063 return vm##OP##_mm_b##MLEN(b, a, HWY_RVV_AVL(SEW, SHIFT)); \
1077 #undef HWY_RVV_RETM_ARGMM
1080 #define HWY_RVV_IF_THEN_ELSE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1081 SHIFT, MLEN, NAME, OP) \
1082 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1083 NAME(HWY_RVV_M(MLEN) m, HWY_RVV_V(BASE, SEW, LMUL) yes, \
1084 HWY_RVV_V(BASE, SEW, LMUL) no) { \
1085 return v##OP##_vvm_##CHAR##SEW##LMUL(m, no, yes, HWY_RVV_AVL(SEW, SHIFT)); \
1090 #undef HWY_RVV_IF_THEN_ELSE
1093 template <
class M,
class V>
1100 #define HWY_RVV_IF_THEN_ZERO_ELSE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, \
1101 LMULH, SHIFT, MLEN, NAME, OP) \
1102 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1103 NAME(HWY_RVV_M(MLEN) m, HWY_RVV_V(BASE, SEW, LMUL) no) { \
1104 return v##OP##_##CHAR##SEW##LMUL(m, no, 0, HWY_RVV_AVL(SEW, SHIFT)); \
1110 #undef HWY_RVV_IF_THEN_ZERO_ELSE
1116 return detail::NeS(
v, 0);
1122 template <
class D,
typename MFrom>
1132 #define HWY_RVV_VEC_FROM_MASK(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1133 SHIFT, MLEN, NAME, OP) \
1134 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1135 NAME(HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_M(MLEN) m) { \
1136 return v##OP##_##CHAR##SEW##LMUL##_m(m, v0, v0, 1, \
1137 HWY_RVV_AVL(SEW, SHIFT)); \
1141 #undef HWY_RVV_VEC_FROM_MASK
1144 template <
class D, HWY_IF_NOT_FLOAT_D(D)>
1146 return detail::SubS(
Zero(
d), mask);
1149 template <
class D, HWY_IF_FLOAT_D(D)>
1176 static_assert(
IsSigned<TFromV<V>>(),
"Only works for signed/float");
1187 #define HWY_RVV_FIND_FIRST_TRUE(SEW, SHIFT, MLEN, NAME, OP) \
1188 template <class D> \
1189 HWY_API intptr_t FindFirstTrue(D d, HWY_RVV_M(MLEN) m) { \
1190 static_assert(MLenFromD(d) == MLEN, "Type mismatch"); \
1191 return vfirst_m_b##MLEN(m, Lanes(d)); \
1195 #undef HWY_RVV_FIND_FIRST_TRUE
1205 #define HWY_RVV_ALL_TRUE(SEW, SHIFT, MLEN, NAME, OP) \
1206 template <class D> \
1207 HWY_API bool AllTrue(D d, HWY_RVV_M(MLEN) m) { \
1208 static_assert(MLenFromD(d) == MLEN, "Type mismatch"); \
1209 return AllFalse(d, vmnot_m_b##MLEN(m, Lanes(d))); \
1213 #undef HWY_RVV_ALL_TRUE
1217 #define HWY_RVV_COUNT_TRUE(SEW, SHIFT, MLEN, NAME, OP) \
1218 template <class D> \
1219 HWY_API size_t CountTrue(D d, HWY_RVV_M(MLEN) m) { \
1220 static_assert(MLenFromD(d) == MLEN, "Type mismatch"); \
1221 return vcpop_m_b##MLEN(m, Lanes(d)); \
1225 #undef HWY_RVV_COUNT_TRUE
1231 #define HWY_RVV_LOAD(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1233 template <size_t N> \
1234 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1235 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1236 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1237 return v##OP##SEW##_v_##CHAR##SEW##LMUL(p, Lanes(d)); \
1243 template <
size_t N,
int kPow2>
1250 template <
size_t N,
int kPow2>
1267 #define HWY_RVV_MASKED_LOAD(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1268 SHIFT, MLEN, NAME, OP) \
1269 template <size_t N> \
1270 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1271 NAME(HWY_RVV_M(MLEN) m, HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1272 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1273 return v##OP##SEW##_v_##CHAR##SEW##LMUL##_m(m, Zero(d), p, Lanes(d)); \
1276 #undef HWY_RVV_MASKED_LOAD
1280 #define HWY_RVV_STORE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1282 template <size_t N> \
1283 HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, \
1284 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1285 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1286 return v##OP##SEW##_v_##CHAR##SEW##LMUL(p, v, Lanes(d)); \
1289 #undef HWY_RVV_STORE
1293 #define HWY_RVV_BLENDED_STORE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1294 SHIFT, MLEN, NAME, OP) \
1295 template <size_t N> \
1296 HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_M(MLEN) m, \
1297 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1298 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1299 return v##OP##SEW##_v_##CHAR##SEW##LMUL##_m(m, p, v, Lanes(d)); \
1302 #undef HWY_RVV_BLENDED_STORE
1306 #define HWY_RVV_STOREN(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1308 template <size_t N> \
1309 HWY_API void NAME(size_t count, HWY_RVV_V(BASE, SEW, LMUL) v, \
1310 HWY_RVV_D(BASE, SEW, N, SHIFT) , \
1311 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT p) { \
1312 return v##OP##SEW##_v_##CHAR##SEW##LMUL(p, v, count); \
1315 #undef HWY_RVV_STOREN
1322 template <
class V,
class D>
1328 template <
class V,
class D,
typename T>
1335 #define HWY_RVV_SCATTER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1336 SHIFT, MLEN, NAME, OP) \
1337 template <size_t N> \
1338 HWY_API void NAME(HWY_RVV_V(BASE, SEW, LMUL) v, \
1339 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1340 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT base, \
1341 HWY_RVV_V(int, SEW, LMUL) offset) { \
1342 return v##OP##ei##SEW##_v_##CHAR##SEW##LMUL( \
1343 base, detail::BitCastToUnsigned(offset), v, Lanes(d)); \
1346 #undef HWY_RVV_SCATTER
1350 template <
class D, HWY_IF_LANE_SIZE_D(D, 4)>
1356 template <
class D, HWY_IF_LANE_SIZE_D(D, 8)>
1358 const VFromD<RebindToSigned<D>> index) {
1364 #define HWY_RVV_GATHER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1366 template <size_t N> \
1367 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1368 NAME(HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1369 const HWY_RVV_T(BASE, SEW) * HWY_RESTRICT base, \
1370 HWY_RVV_V(int, SEW, LMUL) offset) { \
1371 return v##OP##ei##SEW##_v_##CHAR##SEW##LMUL( \
1372 base, detail::BitCastToUnsigned(offset), Lanes(d)); \
1375 #undef HWY_RVV_GATHER
1379 template <
class D, HWY_IF_LANE_SIZE_D(D, 4)>
1385 template <
class D, HWY_IF_LANE_SIZE_D(D, 8)>
1387 const VFromD<RebindToSigned<D>> index) {
1393 #define HWY_RVV_STORE3(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1395 template <size_t N> \
1396 HWY_API void NAME( \
1397 HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_V(BASE, SEW, LMUL) v1, \
1398 HWY_RVV_V(BASE, SEW, LMUL) v2, HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1399 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT unaligned) { \
1400 return v##OP##e8_v_##CHAR##SEW##LMUL(unaligned, v0, v1, v2, Lanes(d)); \
1403 HWY_RVV_STORE3(uint,
u, 8,
_,
_,
mf8,
_,
_, -3, 64,
1405 HWY_RVV_STORE3(uint,
u, 8,
_,
_,
mf4,
_,
_, -2, 32,
1407 HWY_RVV_STORE3(uint,
u, 8,
_,
_,
mf2,
_,
_, -1, 16,
1409 HWY_RVV_STORE3(uint,
u, 8,
_,
_,
m1,
_,
_, 0, 8,
StoreInterleaved3,
1411 HWY_RVV_STORE3(uint,
u, 8,
_,
_,
m2,
_,
_, 1, 4,
StoreInterleaved3,
1413 #undef HWY_RVV_STORE3
1417 #define HWY_RVV_STORE4(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1419 template <size_t N> \
1420 HWY_API void NAME( \
1421 HWY_RVV_V(BASE, SEW, LMUL) v0, HWY_RVV_V(BASE, SEW, LMUL) v1, \
1422 HWY_RVV_V(BASE, SEW, LMUL) v2, HWY_RVV_V(BASE, SEW, LMUL) v3, \
1423 HWY_RVV_D(BASE, SEW, N, SHIFT) d, \
1424 HWY_RVV_T(BASE, SEW) * HWY_RESTRICT aligned) { \
1425 return v##OP##e8_v_##CHAR##SEW##LMUL(aligned, v0, v1, v2, v3, Lanes(d)); \
1428 HWY_RVV_STORE4(uint,
u, 8,
_,
_,
mf8,
_,
_, -3, 64,
1430 HWY_RVV_STORE4(uint,
u, 8,
_,
_,
mf4,
_,
_, -2, 32,
1432 HWY_RVV_STORE4(uint,
u, 8,
_,
_,
mf2,
_,
_, -1, 16,
1434 HWY_RVV_STORE4(uint,
u, 8,
_,
_,
m1,
_,
_, 0, 8,
StoreInterleaved4,
1436 HWY_RVV_STORE4(uint,
u, 8,
_,
_,
m2,
_,
_, 1, 4,
StoreInterleaved4,
1439 #undef HWY_RVV_STORE4
1446 #define HWY_RVV_PROMOTE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1447 SHIFT, MLEN, NAME, OP) \
1448 template <size_t N> \
1449 HWY_API HWY_RVV_V(BASE, SEWD, LMULD) NAME( \
1450 HWY_RVV_D(BASE, SEWD, N, SHIFT + 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
1451 return OP##CHAR##SEWD##LMULD(v, Lanes(d)); \
1462 #undef HWY_RVV_PROMOTE
1466 #define HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, LMUL, LMUL_IN, \
1468 template <size_t N> \
1469 HWY_API HWY_RVV_V(BASE, BITS, LMUL) \
1470 PromoteTo(HWY_RVV_D(BASE, BITS, N, SHIFT + ADD) d, \
1471 HWY_RVV_V(BASE_IN, BITS_IN, LMUL_IN) v) { \
1472 return OP##CHAR##BITS##LMUL(v, Lanes(d)); \
1475 #define HWY_RVV_PROMOTE_X2(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN) \
1476 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf2, -2, 1) \
1477 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf2, -1, 1) \
1478 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m2, m1, 0, 1) \
1479 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m4, m2, 1, 1) \
1480 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m8, m4, 2, 1)
1482 #define HWY_RVV_PROMOTE_X4(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN) \
1483 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, mf2, mf8, -3, 2) \
1484 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m1, mf4, -2, 2) \
1485 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m2, mf2, -1, 2) \
1486 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m4, m1, 0, 2) \
1487 HWY_RVV_PROMOTE(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN, m8, m2, 1, 2)
1495 #undef HWY_RVV_PROMOTE_X4
1496 #undef HWY_RVV_PROMOTE_X2
1497 #undef HWY_RVV_PROMOTE
1500 template <
size_t N,
int kPow2>
1507 template <
size_t N,
int kPow2>
1514 template <
size_t N,
int kPow2>
1521 template <
size_t N,
int kPow2>
1526 const Rebind<uint16_t, decltype(
d)> du16;
1533 #define HWY_RVV_DEMOTE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1535 template <size_t N> \
1536 HWY_API HWY_RVV_V(BASE, SEWH, LMULH) NAME( \
1537 HWY_RVV_D(BASE, SEWH, N, SHIFT - 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
1538 return OP##CHAR##SEWH##LMULH(v, 0, Lanes(d)); \
1540 template <size_t N> \
1541 HWY_API HWY_RVV_V(BASE, SEWH, LMULH) NAME##Shr16( \
1542 HWY_RVV_D(BASE, SEWH, N, SHIFT - 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
1543 return OP##CHAR##SEWH##LMULH(v, 16, Lanes(d)); \
1553 #define HWY_RVV_DEMOTE_I_TO_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1554 SHIFT, MLEN, NAME, OP) \
1555 template <size_t N> \
1556 HWY_API HWY_RVV_V(uint, SEWH, LMULH) NAME( \
1557 HWY_RVV_D(uint, SEWH, N, SHIFT - 1) d, HWY_RVV_V(int, SEW, LMUL) v) { \
1559 return detail::DemoteTo(d, detail::BitCastToUnsigned(detail::MaxS(v, 0))); \
1563 #undef HWY_RVV_DEMOTE_I_TO_U
1588 return vnclipu_wx_u8mf8(vnclipu_wx_u16mf4(
v, 0, avl), 0, avl);
1592 return vnclipu_wx_u8mf4(vnclipu_wx_u16mf2(
v, 0, avl), 0, avl);
1596 return vnclipu_wx_u8mf2(vnclipu_wx_u16m1(
v, 0, avl), 0, avl);
1600 return vnclipu_wx_u8m1(vnclipu_wx_u16m2(
v, 0, avl), 0, avl);
1604 return vnclipu_wx_u8m2(vnclipu_wx_u16m4(
v, 0, avl), 0, avl);
1633 #undef HWY_RVV_DEMOTE
1638 #define HWY_RVV_DEMOTE_F(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1639 SHIFT, MLEN, NAME, OP) \
1640 template <size_t N> \
1641 HWY_API HWY_RVV_V(BASE, SEWH, LMULH) NAME( \
1642 HWY_RVV_D(BASE, SEWH, N, SHIFT - 1) d, HWY_RVV_V(BASE, SEW, LMUL) v) { \
1643 return OP##SEWH##LMULH(v, Lanes(d)); \
1646 #if HWY_HAVE_FLOAT16
1652 #undef HWY_RVV_DEMOTE_F
1657 return vfncvt_rtz_x_f_w_i32mf2(
v,
Lanes(
d));
1661 return vfncvt_rtz_x_f_w_i32mf2(
v,
Lanes(
d));
1665 return vfncvt_rtz_x_f_w_i32m1(
v,
Lanes(
d));
1669 return vfncvt_rtz_x_f_w_i32m2(
v,
Lanes(
d));
1673 return vfncvt_rtz_x_f_w_i32m4(
v,
Lanes(
d));
1676 template <
size_t N,
int kPow2>
1680 const Rebind<uint32_t, decltype(
d)> du32;
1681 return detail::DemoteToShr16(du16,
BitCast(du32,
v));
1686 #define HWY_RVV_CONVERT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1687 SHIFT, MLEN, NAME, OP) \
1688 template <size_t N> \
1689 HWY_API HWY_RVV_V(BASE, SEW, LMUL) ConvertTo( \
1690 HWY_RVV_D(BASE, SEW, N, SHIFT) d, HWY_RVV_V(int, SEW, LMUL) v) { \
1691 return vfcvt_f_x_v_f##SEW##LMUL(v, Lanes(d)); \
1694 template <size_t N> \
1695 HWY_API HWY_RVV_V(int, SEW, LMUL) ConvertTo(HWY_RVV_D(int, SEW, N, SHIFT) d, \
1696 HWY_RVV_V(BASE, SEW, LMUL) v) { \
1697 return vfcvt_rtz_x_f_v_i##SEW##LMUL(v, Lanes(d)); \
1701 #undef HWY_RVV_CONVERT
1704 #define HWY_RVV_NEAREST(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1705 SHIFT, MLEN, NAME, OP) \
1706 HWY_API HWY_RVV_V(int, SEW, LMUL) NearestInt(HWY_RVV_V(BASE, SEW, LMUL) v) { \
1707 return vfcvt_x_f_v_i##SEW##LMUL(v, HWY_RVV_AVL(SEW, SHIFT)); \
1710 #undef HWY_RVV_NEAREST
1718 template <
typename T,
size_t N,
int kPow2>
1720 size_t lpb = 16 /
sizeof(T);
1721 if (IsFull(
d))
return lpb;
1725 if (kPow2 >= 0)
return lpb;
1730 template <
class D,
class V>
1736 template <
size_t kLanes,
class D>
1741 return LtS(
BitCast(di, idx_mod),
static_cast<TFromD<decltype(di)
>>(kLanes));
1745 #define HWY_RVV_SLIDE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1747 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1748 NAME(HWY_RVV_V(BASE, SEW, LMUL) dst, HWY_RVV_V(BASE, SEW, LMUL) src, \
1750 return v##OP##_vx_##CHAR##SEW##LMUL(dst, src, lanes, \
1751 HWY_RVV_AVL(SEW, SHIFT)); \
1757 #undef HWY_RVV_SLIDE
1762 template <
class D,
class V>
1768 template <
class D,
class V>
1770 return detail::SlideUp(lo, hi,
Lanes(
d) / 2);
1774 template <
class D,
class V>
1777 const auto lo_down = detail::SlideDown(lo, lo,
Lanes(
d) / 2);
1782 template <
class D,
class V>
1785 const auto hi_up = detail::SlideUp(hi, hi,
Lanes(
d) / 2);
1786 const auto lo_down = detail::SlideDown(lo, lo,
Lanes(
d) / 2);
1791 template <
class D2,
class V>
1793 return detail::SlideUp(detail::Ext(d2, lo), detail::Ext(d2, hi),
1799 template <
class D2,
class V>
1818 template <
class DH, hwy::EnableIf<detail::IsSupportedLMUL(DH())>* =
nullptr>
1828 template <
class DH,
class V,
1849 #define HWY_RVV_SLIDE1(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1851 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
1852 return v##OP##_##CHAR##SEW##LMUL(v, 0, HWY_RVV_AVL(SEW, SHIFT)); \
1859 #undef HWY_RVV_SLIDE1
1864 #define HWY_RVV_GET_LANE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
1865 SHIFT, MLEN, NAME, OP) \
1866 HWY_API HWY_RVV_T(BASE, SEW) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
1867 return v##OP##_s_##CHAR##SEW##LMUL##_##CHAR##SEW(v); \
1872 #undef HWY_RVV_GET_LANE
1877 const RebindToUnsigned<DFromV<V>> du;
1878 const auto is_even = detail::EqS(detail::AndS(
detail::Iota0(du), 1), 0);
1885 const V up = detail::Slide1Up(
v);
1892 const V down = detail::Slide1Down(
v);
1899 const RebindToUnsigned<DFromV<V>> du;
1900 constexpr
size_t kShift =
CeilLog2(16 /
sizeof(TFromV<V>));
1901 const auto idx_block = ShiftRight<kShift>(
detail::Iota0(du));
1902 const auto is_even = detail::EqS(detail::AndS(idx_block, 1), 0);
1912 const V down = detail::SlideDown(
v,
v, lpb);
1913 const V up = detail::SlideUp(
v,
v, lpb);
1919 template <
class D,
class VI>
1921 static_assert(
sizeof(TFromD<D>) ==
sizeof(TFromV<VI>),
"Index != lane");
1923 const auto indices =
BitCast(du, vec);
1924 #if HWY_IS_DEBUG_BUILD
1930 template <
class D,
typename TI>
1932 static_assert(
sizeof(TFromD<D>) ==
sizeof(TI),
"Index size must match lane");
1938 #define HWY_RVV_TABLE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
1940 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
1941 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(uint, SEW, LMUL) idx) { \
1942 return v##OP##_vv_##CHAR##SEW##LMUL(v, idx, HWY_RVV_AVL(SEW, SHIFT)); \
1946 #undef HWY_RVV_TABLE
1949 template <
class D,
class V>
1953 const auto idx = detail::AddS(
Add(iota, iota), 1);
1956 return detail::SlideUp(lo_odd, hi_odd,
Lanes(
d) / 2);
1960 template <
class D,
class V>
1964 const auto idx =
Add(iota, iota);
1967 return detail::SlideUp(lo_even, hi_even,
Lanes(
d) / 2);
1974 using TU =
TFromD<decltype(du)>;
1975 const size_t N =
Lanes(du);
1993 const Twice<decltype(
d)> d2;
1994 const Twice<decltype(d2)> d4;
1996 const auto vx = detail::Ext(d4, detail::Ext(d2,
v));
2003 template <
class D, HWY_IF_LANE_SIZE_D(D, 4), HWY_RVV_IF_POW2_IN(D, 0, 3)>
2012 const Twice<decltype(
d)> d2;
2013 const Twice<decltype(d2)> d4;
2015 const auto vx = detail::Ext(d4, detail::Ext(d2,
v));
2020 template <
class D,
class V = VFromD<D>, HWY_IF_LANE_SIZE_D(D, 8)>
2022 const V up = detail::Slide1Up(
v);
2023 const V down = detail::Slide1Down(
v);
2031 const RebindToUnsigned<D> du;
2040 const RebindToUnsigned<D> du;
2046 template <
class D,
class V = VFromD<D>>
2048 const Repartition<uint64_t, D> du64;
2049 const size_t N =
Lanes(du64);
2051 detail::ReverseSubS(
detail::Iota0(du64),
static_cast<uint64_t
>(
N - 1));
2053 const auto idx = detail::XorS(rev, 1);
2059 template <
typename T>
2060 struct CompressIsPartition {
2064 #define HWY_RVV_COMPRESS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
2065 SHIFT, MLEN, NAME, OP) \
2066 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
2067 NAME(HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_M(MLEN) mask) { \
2068 return v##OP##_vm_##CHAR##SEW##LMUL(mask, v, v, HWY_RVV_AVL(SEW, SHIFT)); \
2073 #undef HWY_RVV_COMPRESS
2076 template <
class V,
class M,
class D>
2084 template <
class V,
class M,
class D>
2088 detail::StoreN(count,
Compress(
v, mask),
d, unaligned);
2095 template <
size_t kBytes,
class D,
class V = VFromD<D>>
2098 const auto hi8 =
BitCast(d8, hi);
2099 const auto lo8 =
BitCast(d8, lo);
2100 const auto hi_up = detail::SlideUp(hi8, hi8, 16 - kBytes);
2101 const auto lo_down = detail::SlideDown(lo8, lo8, kBytes);
2107 template <
size_t kLanes,
class D,
class V = VFromD<D>>
2109 constexpr
size_t kLanesUp = 16 /
sizeof(
TFromV<V>) - kLanes;
2110 const auto hi_up = detail::SlideUp(hi, hi, kLanesUp);
2111 const auto lo_down = detail::SlideDown(lo, lo, kLanes);
2112 const auto is_lo = detail::FirstNPerBlock<kLanesUp>(
d);
2120 static_assert(
sizeof(
TFromD<decltype(
d)>) == 4,
"Defined for 32-bit types");
2123 return BitCast(
d,
Or(ShiftRight<32>(v64), ShiftLeft<32>(v64)));
2130 static_assert(
sizeof(
TFromD<decltype(
d)>) == 4,
"Defined for 32-bit types");
2131 return CombineShiftRightLanes<3>(
d,
v,
v);
2138 static_assert(
sizeof(
TFromD<decltype(
d)>) == 4,
"Defined for 32-bit types");
2139 return CombineShiftRightLanes<1>(
d,
v,
v);
2146 static_assert(
sizeof(
TFromD<decltype(
d)>) == 4,
"Defined for 32-bit types");
2147 return CombineShiftRightLanes<2>(
d,
v,
v);
2154 static_assert(
sizeof(
TFromD<decltype(
d)>) == 8,
"Defined for 64-bit types");
2155 return CombineShiftRightLanes<1>(
d,
v,
v);
2169 template <
typename T,
size_t N,
int kPow2>
2172 const Simd<T,
N, kPow2 - 1> dh;
2173 const Simd<T,
N, kPow2 - 2> dhh;
2174 return Ext(
d, Ext(dh, Ext(dhh,
v)));
2176 template <
typename T,
size_t N,
int kPow2>
2179 const Simd<T,
N, kPow2 - 1> dh;
2180 return Ext(
d, Ext(dh,
v));
2182 template <
typename T,
size_t N,
int kPow2>
2188 template <
typename T,
size_t N,
int kPow2>
2194 template <
typename T,
size_t N,
int kPow2>
2199 template <
typename T,
size_t N,
int kPow2>
2204 template <
typename T,
size_t N,
int kPow2>
2212 template <
class VT,
class VI>
2222 constexpr
int kPow2T =
Pow2(dt8);
2223 constexpr
int kPow2I =
Pow2(di8);
2230 if (kPow2T < kPow2I) {
2231 offsets = detail::AndS(offsets,
Lanes(dt8) - 1);
2237 template <
class VT,
class VI>
2241 const auto idx8 =
BitCast(di8, idx);
2247 template <
int kLane,
class V>
2253 idx = detail::AddS(idx, kLane);
2260 template <
size_t kLanes,
class D,
class V = VFromD<D>>
2263 using TI =
TFromD<decltype(di)>;
2264 const auto shifted = detail::SlideUp(
v,
v, kLanes);
2266 const auto idx_mod =
2268 const auto clear = detail::LtS(
BitCast(di, idx_mod),
static_cast<TI
>(kLanes));
2272 template <
size_t kLanes,
class V>
2274 return ShiftLeftLanes<kLanes>(DFromV<V>(),
v);
2279 template <
int kBytes,
class D>
2285 template <
int kBytes,
class V>
2287 return ShiftLeftBytes<kBytes>(DFromV<V>(),
v);
2291 template <
size_t kLanes,
typename T,
size_t N,
int kPow2,
2292 class V = VFromD<Simd<T, N, kPow2>>>
2295 using TI =
TFromD<decltype(di)>;
2297 if (
N <= 16 /
sizeof(T)) {
2301 const auto shifted = detail::SlideDown(
v,
v, kLanes);
2304 const auto idx_mod = detail::AndS(
detail::Iota0(di), lpb - 1);
2306 detail::LtS(
BitCast(di, idx_mod),
static_cast<TI
>(lpb - kLanes));
2311 template <
int kBytes,
class D,
class V = VFromD<D>>
2319 template <
class D,
class V>
2321 static_assert(
IsSame<TFromD<D>, TFromV<V>>(),
"D/V mismatch");
2324 const auto idx_mod =
2327 const auto is_even = detail::EqS(detail::AndS(i, 1), 0
u);
2339 template <
class D,
class V>
2345 const auto idx_mod = ShiftRight<1>(detail::AndS(i, lpb - 1));
2347 const auto idx = detail::AddS(idx_lower, lpb / 2);
2348 const auto is_even = detail::EqS(detail::AndS(i, 1), 0
u);
2355 template <
class V,
class DW = RepartitionToW
ide<DFromV<V>>>
2357 const RepartitionToNarrow<DW> dn;
2358 static_assert(
IsSame<
TFromD<decltype(dn)>, TFromV<V>>(),
"D/V mismatch");
2362 template <
class V,
class DW = RepartitionToW
ide<DFromV<V>>>
2368 template <
class DW,
class V>
2370 const RepartitionToNarrow<DW> dn;
2371 static_assert(
IsSame<
TFromD<decltype(dn)>, TFromV<V>>(),
"D/V mismatch");
2378 #define HWY_RVV_REDUCE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, \
2380 template <class D> \
2381 HWY_API HWY_RVV_V(BASE, SEW, LMUL) \
2382 NAME(D d, HWY_RVV_V(BASE, SEW, LMUL) v, HWY_RVV_V(BASE, SEW, m1) v0) { \
2383 return Set(d, GetLane(v##OP##_vs_##CHAR##SEW##LMUL##_##CHAR##SEW##m1( \
2384 v0, v, v0, Lanes(d)))); \
2397 return detail::RedSum(
d,
v, v0);
2411 const auto neutral =
Set(d1, HighestValue<T>());
2412 return detail::RedMin(
d,
v, neutral);
2426 const auto neutral =
Set(d1, LowestValue<T>());
2427 return detail::RedMax(
d,
v, neutral);
2430 #undef HWY_RVV_REDUCE
2437 template <
typename V,
class D = DFromV<V>, HWY_IF_LANES_ARE(u
int8_t, V),
2438 hwy::EnableIf<Pow2(D()) < 1 || MaxLanes(D()) < 16>* =
nullptr>
2439 HWY_API V PopulationCount(V v) {
2441 v = Sub(v, detail::AndS(ShiftRight<1>(v), 0x55));
2442 v = Add(detail::AndS(ShiftRight<2>(v), 0x33), detail::AndS(v, 0x33));
2443 return detail::AndS(Add(v, ShiftRight<4>(v)), 0x0F);
2450 const auto loaded =
Load(
d, p);
2470 #define HWY_RVV_LOAD_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP) \
2471 HWY_INLINE HWY_RVV_M(MLEN) \
2472 NAME(hwy::SizeTag<MLEN> , const uint8_t* bits, size_t N) { \
2473 return OP##_v_b##MLEN(bits, N); \
2476 #undef HWY_RVV_LOAD_MASK_BITS
2479 template <
class D,
class MT = detail::MaskTag<D>>
2486 #define HWY_RVV_STORE_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP) \
2487 template <class D> \
2488 HWY_API size_t NAME(D d, HWY_RVV_M(MLEN) m, uint8_t* bits) { \
2489 const size_t N = Lanes(d); \
2490 OP##_v_b##MLEN(bits, m, N); \
2493 constexpr bool kLessThan8 = \
2494 detail::ScaleByPower(16 / sizeof(TFromD<D>), Pow2(d)) < 8; \
2495 if (MaxLanes(d) < 8 || (kLessThan8 && N < 8)) { \
2496 const int mask = (1 << N) - 1; \
2497 bits[0] = static_cast<uint8_t>(bits[0] & mask); \
2499 return (N + 7) / 8; \
2502 #undef HWY_RVV_STORE_MASK_BITS
2520 template <
class D, HWY_IF_NOT_LANE_SIZE_D(D, 1)>
2522 const RebindToSigned<D> di;
2523 using TI =
TFromD<decltype(di)>;
2528 template <
class D, HWY_IF_LANE_SIZE_D(D, 1)>
2530 const auto zero =
Zero(
d);
2531 const auto one =
Set(
d, 1);
2532 return Eq(detail::SlideUp(one, zero, n), one);
2537 template <
class V, HWY_IF_SIGNED_V(V)>
2539 return detail::ReverseSubS(
v, 0);
2543 #define HWY_RVV_RETV_ARGV2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, \
2544 SHIFT, MLEN, NAME, OP) \
2545 HWY_API HWY_RVV_V(BASE, SEW, LMUL) NAME(HWY_RVV_V(BASE, SEW, LMUL) v) { \
2546 return v##OP##_vv_##CHAR##SEW##LMUL(v, v, HWY_RVV_AVL(SEW, SHIFT)); \
2553 template <
class V, HWY_IF_SIGNED_V(V)>
2560 #undef HWY_RVV_RETV_ARGV2
2576 enum RoundingModes { kNear, kTrunc, kDown, kUp };
2590 const auto int_f =
ConvertTo(df, integer);
2602 const auto int_f =
ConvertTo(df, integer);
2610 asm volatile(
"fsrm %0" ::
"r"(detail::kUp));
2611 const auto ret =
Round(
v);
2612 asm volatile(
"fsrm %0" ::
"r"(detail::kNear));
2619 asm volatile(
"fsrm %0" ::
"r"(detail::kDown));
2620 const auto ret =
Round(
v);
2621 asm volatile(
"fsrm %0" ::
"r"(detail::kNear));
2627 template <
class D, HWY_IF_UNSIGNED_D(D)>
2632 template <
class D, HWY_IF_SIGNED_D(D)>
2634 const RebindToUnsigned<D> du;
2638 template <
class D, HWY_IF_FLOAT_D(D)>
2640 const RebindToUnsigned<D> du;
2641 const RebindToSigned<D> di;
2647 template <
class V, HWY_IF_LANE_SIZE_V(V, 4),
class D = DFromV<V>,
2648 class DW = RepartitionToW
ide<D>>
2650 const auto lo =
Mul(a, b);
2656 template <
class V, HWY_IF_LANE_SIZE_V(V, 8)>
2660 return OddEven(detail::Slide1Up(hi), lo);
2663 template <
class V, HWY_IF_LANE_SIZE_V(V, 8)>
2667 return OddEven(hi, detail::Slide1Down(lo));
2672 template <
size_t N,
int kPow2>
2679 const VFromD<decltype(du32)> b_in_even = ShiftRight<16>(
BitCast(du32, b));
2688 template <
size_t N,
int kPow2>
2692 const VFromD<decltype(df32)> sum0,
2693 VFromD<decltype(df32)>& sum1)
2694 ->
VFromD<decltype(df32)> {
2697 using VU32 =
VFromD<decltype(du32)>;
2698 const VFromD<decltype(du16)> zero =
Zero(du16);
2728 const VFromD<D> ltLx = detail::Slide1Up(ltHL);
2738 const VFromD<D> aXH = detail::Slide1Down(a);
2739 const VFromD<D> bXH = detail::Slide1Down(b);
2740 const VFromD<D> minHL =
Min(a, b);
2741 const MFromD<D> ltXH =
Lt(aXH, bXH);
2742 const MFromD<D> eqXH =
Eq(aXH, bXH);
2752 const VFromD<D> aXH = detail::Slide1Down(a);
2753 const VFromD<D> bXH = detail::Slide1Down(b);
2754 const VFromD<D> maxHL =
Max(a, b);
2755 const MFromD<D> ltXH =
Lt(aXH, bXH);
2756 const MFromD<D> eqXH =
Eq(aXH, bXH);
2768 #undef HWY_RVV_FOREACH
2769 #undef HWY_RVV_FOREACH_08_ALL
2770 #undef HWY_RVV_FOREACH_08_ALL_VIRT
2771 #undef HWY_RVV_FOREACH_08_DEMOTE
2772 #undef HWY_RVV_FOREACH_08_DEMOTE_VIRT
2773 #undef HWY_RVV_FOREACH_08_EXT
2774 #undef HWY_RVV_FOREACH_08_EXT_VIRT
2775 #undef HWY_RVV_FOREACH_08_TRUNC
2776 #undef HWY_RVV_FOREACH_08_VIRT
2777 #undef HWY_RVV_FOREACH_16_ALL
2778 #undef HWY_RVV_FOREACH_16_ALL_VIRT
2779 #undef HWY_RVV_FOREACH_16_DEMOTE
2780 #undef HWY_RVV_FOREACH_16_DEMOTE_VIRT
2781 #undef HWY_RVV_FOREACH_16_EXT
2782 #undef HWY_RVV_FOREACH_16_EXT_VIRT
2783 #undef HWY_RVV_FOREACH_16_TRUNC
2784 #undef HWY_RVV_FOREACH_16_VIRT
2785 #undef HWY_RVV_FOREACH_32_ALL
2786 #undef HWY_RVV_FOREACH_32_ALL_VIRT
2787 #undef HWY_RVV_FOREACH_32_DEMOTE
2788 #undef HWY_RVV_FOREACH_32_DEMOTE_VIRT
2789 #undef HWY_RVV_FOREACH_32_EXT
2790 #undef HWY_RVV_FOREACH_32_EXT_VIRT
2791 #undef HWY_RVV_FOREACH_32_TRUNC
2792 #undef HWY_RVV_FOREACH_32_VIRT
2793 #undef HWY_RVV_FOREACH_64_ALL
2794 #undef HWY_RVV_FOREACH_64_ALL_VIRT
2795 #undef HWY_RVV_FOREACH_64_DEMOTE
2796 #undef HWY_RVV_FOREACH_64_DEMOTE_VIRT
2797 #undef HWY_RVV_FOREACH_64_EXT
2798 #undef HWY_RVV_FOREACH_64_EXT_VIRT
2799 #undef HWY_RVV_FOREACH_64_TRUNC
2800 #undef HWY_RVV_FOREACH_64_VIRT
2801 #undef HWY_RVV_FOREACH_B
2802 #undef HWY_RVV_FOREACH_F
2803 #undef HWY_RVV_FOREACH_F16
2804 #undef HWY_RVV_FOREACH_F32
2805 #undef HWY_RVV_FOREACH_F3264
2806 #undef HWY_RVV_FOREACH_F64
2807 #undef HWY_RVV_FOREACH_I
2808 #undef HWY_RVV_FOREACH_I08
2809 #undef HWY_RVV_FOREACH_I16
2810 #undef HWY_RVV_FOREACH_I163264
2811 #undef HWY_RVV_FOREACH_I32
2812 #undef HWY_RVV_FOREACH_I64
2813 #undef HWY_RVV_FOREACH_U
2814 #undef HWY_RVV_FOREACH_U08
2815 #undef HWY_RVV_FOREACH_U16
2816 #undef HWY_RVV_FOREACH_U163264
2817 #undef HWY_RVV_FOREACH_U32
2818 #undef HWY_RVV_FOREACH_U64
2819 #undef HWY_RVV_FOREACH_UI
2820 #undef HWY_RVV_FOREACH_UI08
2821 #undef HWY_RVV_FOREACH_UI16
2822 #undef HWY_RVV_FOREACH_UI163264
2823 #undef HWY_RVV_FOREACH_UI32
2824 #undef HWY_RVV_FOREACH_UI3264
2825 #undef HWY_RVV_FOREACH_UI64
2827 #undef HWY_RVV_RETV_ARGV
2828 #undef HWY_RVV_RETV_ARGVS
2829 #undef HWY_RVV_RETV_ARGVV
#define HWY_MAX(a, b)
Definition: base.h:128
#define HWY_RESTRICT
Definition: base.h:63
#define HWY_API
Definition: base.h:122
#define HWY_MIN(a, b)
Definition: base.h:127
#define HWY_INLINE
Definition: base.h:64
#define HWY_DASSERT(condition)
Definition: base.h:193
HWY_INLINE VFromD< DU > BitCastToUnsigned(V v)
Definition: rvv-inl.h:655
HWY_INLINE Mask128< float, N > UseInt(const Vec128< float, N > v)
Definition: arm_neon-inl.h:2860
HWY_INLINE V OffsetsOf128BitBlocks(const D d, const V iota0)
Definition: arm_sve-inl.h:1664
HWY_INLINE Mask128< T, N > MaskFromVec(hwy::SizeTag< 1 >, const Vec128< T, N > v)
Definition: x86_128-inl.h:1357
constexpr size_t LanesPerBlock(Simd< T, N, kPow2 >)
Definition: arm_sve-inl.h:1503
svbool_t FirstNPerBlock(D d)
Definition: arm_sve-inl.h:1670
HWY_INLINE auto ChangeLMUL(Simd< T, N, kPow2 > d, VFromD< Simd< T, N, kPow2 - 3 >> v) -> VFromD< decltype(d)>
Definition: rvv-inl.h:2170
HWY_INLINE VFromD< DU > Iota0(const D)
Definition: rvv-inl.h:676
constexpr size_t ScaleByPower(size_t N, int pow2)
Definition: ops/shared-inl.h:115
constexpr bool IsSupportedLMUL(D d)
Definition: rvv-inl.h:1811
HWY_INLINE Vec128< uint8_t, N > BitCastFromByte(Simd< uint8_t, N, 0 >, Vec128< uint8_t, N > v)
Definition: arm_neon-inl.h:601
HWY_INLINE Vec128< uint8_t, N > BitCastToByte(Vec128< uint8_t, N > v)
Definition: arm_neon-inl.h:574
HWY_INLINE Mask128< T, N > LoadMaskBits(Simd< T, N, 0 > d, uint64_t mask_bits)
Definition: arm_neon-inl.h:4467
d
Definition: rvv-inl.h:1656
sseg3 sseg3 sseg4 mf2
Definition: rvv-inl.h:1432
HWY_API Vec128< T, N > CopySign(const Vec128< T, N > magn, const Vec128< T, N > sign)
Definition: arm_neon-inl.h:1648
HWY_API Vec128< T, N > OddEvenBlocks(Vec128< T, N >, Vec128< T, N > even)
Definition: arm_neon-inl.h:4038
decltype(MaskFromVec(Zero(D()))) MFromD
Definition: rvv-inl.h:1120
HWY_API Vec128< uint8_t > Combine(Full128< uint8_t >, Vec64< uint8_t > hi, Vec64< uint8_t > lo)
Definition: arm_neon-inl.h:3709
HWY_API Mask128< TTo, N > RebindMask(Simd< TTo, N, 0 > dto, Mask128< TFrom, N > m)
Definition: arm_neon-inl.h:1688
HWY_API Vec128< T, N > DupOdd(Vec128< T, N > v)
Definition: arm_neon-inl.h:4003
HWY_API Vec128< float > ApproximateReciprocal(const Vec128< float > v)
Definition: arm_neon-inl.h:1225
HWY_API VFromD< DW > ZipLower(V a, V b)
Definition: arm_neon-inl.h:3672
HWY_API uint8_t GetLane(const Vec128< uint8_t, 16 > v)
Definition: arm_neon-inl.h:767
HWY_API Vec128< T > Shuffle1032(const Vec128< T > v)
Definition: arm_neon-inl.h:3531
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5252
HWY_API Vec128< uint64_t > InterleaveLower(const Vec128< uint64_t > a, const Vec128< uint64_t > b)
Definition: arm_neon-inl.h:3581
HWY_API Vec128< T > Shuffle2103(const Vec128< T > v)
Definition: arm_neon-inl.h:3547
HWY_API Vec128< float, N > Round(const Vec128< float, N > v)
Definition: arm_neon-inl.h:2878
HWY_API Vec128< T, N > ZeroExtendVector(Simd< T, N, 0 > d, Vec128< T, N/2 > lo)
Definition: arm_neon-inl.h:3769
HWY_API auto Eq(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5244
RepartitionToNarrow< RebindToUnsigned< DF > > DU16FromDF
Definition: rvv-inl.h:2686
HWY_API V128 CombineShiftRightBytes(Full128< T > d, V128 hi, V128 lo)
Definition: arm_neon-inl.h:2999
HWY_API auto Gt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5257
HWY_API Vec128< T, N > ShiftLeftLanes(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:3102
_
Definition: rvv-inl.h:1405
HWY_API Mask128< T, N > FirstN(const Simd< T, N, 0 > d, size_t num)
Definition: arm_neon-inl.h:1896
HWY_API size_t StoreMaskBits(Simd< T, N, 0 >, const Mask128< T, N > mask, uint8_t *bits)
Definition: arm_neon-inl.h:4761
HWY_API Vec128< float, N > MulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1290
HWY_API void Stream(const Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2416
HWY_API Vec128< int8_t > Abs(const Vec128< int8_t > v)
Definition: arm_neon-inl.h:1604
Repartition< MakeWide< TFromD< D > >, D > RepartitionToWide
Definition: ops/shared-inl.h:210
HWY_API bool AllTrue(const Simd< T, N, 0 > d, const Mask128< T, N > m)
Definition: arm_neon-inl.h:4790
HWY_API Vec128< T, N > SumOfLanes(Simd< T, N, 0 >, const Vec128< T, N > v)
Definition: arm_neon-inl.h:4437
HWY_API Vec128< T, N > BroadcastSignBit(const Vec128< T, N > v)
Definition: arm_neon-inl.h:1665
V Shl(V a, V b)
Definition: arm_neon-inl.h:5235
HWY_RVV_FOREACH_F64(HWY_RVV_DEMOTE_F, DemoteTo, vfncvt_rod_f_f_w_f, _DEMOTE_VIRT) template< size_t N > HWY_API vint32mf2_t DemoteTo(Simd< int32_t
HWY_API auto Ge(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5261
HWY_API Vec128< float > AbsDiff(const Vec128< float > a, const Vec128< float > b)
Definition: arm_neon-inl.h:1264
StoreInterleaved3
Definition: rvv-inl.h:1405
HWY_API Vec128< uint32_t > ConcatOdd(Full128< uint32_t >, Vec128< uint32_t > hi, Vec128< uint32_t > lo)
Definition: arm_neon-inl.h:3903
HWY_API Vec128< uint64_t, N > Min(const Vec128< uint64_t, N > a, const Vec128< uint64_t, N > b)
Definition: arm_neon-inl.h:1957
HWY_API Vec128< uint64_t, N > Max(const Vec128< uint64_t, N > a, const Vec128< uint64_t, N > b)
Definition: arm_neon-inl.h:1995
HWY_API Mask128< T, N > MaskFromVec(const Vec128< T, N > v)
Definition: arm_neon-inl.h:1675
HWY_RVV_STORE3(uint, u, 8, _, _, mf8, _, _, -3, 64, StoreInterleaved3, sseg3) HWY_RVV_STORE3(uint
HWY_API Vec128< T, N > ConcatUpperUpper(const Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:3842
Rebind< MakeUnsigned< TFromD< D > >, D > RebindToUnsigned
Definition: ops/shared-inl.h:201
HWY_API Vec128< T, N > GatherIndex(const Simd< T, N, 0 > d, const T *HWY_RESTRICT base, const Vec128< Index, N > index)
Definition: arm_neon-inl.h:4284
HWY_INLINE constexpr HWY_MAYBE_UNUSED int Pow2(D)
Definition: ops/shared-inl.h:247
HWY_INLINE Vec128< uint64_t > MulOdd(Vec128< uint64_t > a, Vec128< uint64_t > b)
Definition: arm_neon-inl.h:4159
HWY_API Vec1< uint8_t > SaturatedAdd(const Vec1< uint8_t > a, const Vec1< uint8_t > b)
Definition: scalar-inl.h:457
sseg3 sseg3 StoreInterleaved4
Definition: rvv-inl.h:1428
HWY_API Vec128< T > Shuffle0321(const Vec128< T > v)
Definition: arm_neon-inl.h:3541
HWY_API Vec128< uint32_t > ConcatEven(Full128< uint32_t >, Vec128< uint32_t > hi, Vec128< uint32_t > lo)
Definition: arm_neon-inl.h:3945
HWY_API Vec128< T, N > Load(Simd< T, N, 0 > d, const T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2205
HWY_API Vec128< T, N > ConcatLowerUpper(const Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:3869
HWY_API Vec128< int64_t > Neg(const Vec128< int64_t > v)
Definition: arm_neon-inl.h:904
HWY_API Vec128< T, N > Zero(Simd< T, N, 0 > d)
Definition: arm_neon-inl.h:733
HWY_API Vec128< int64_t > MulEven(Vec128< int32_t > a, Vec128< int32_t > b)
Definition: arm_neon-inl.h:4119
HWY_API size_t Lanes(Simd< T, N, kPow2 > d)
Definition: arm_sve-inl.h:218
HWY_API Vec128< bfloat16_t, 2 *N > ReorderDemote2To(Simd< bfloat16_t, 2 *N, 0 > dbf16, Vec128< float, N > a, Vec128< float, N > b)
Definition: arm_neon-inl.h:4060
HWY_API Vec128< T, N > MaskedLoad(Mask128< T, N > m, Simd< T, N, 0 > d, const T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2210
sseg3 sseg3 sseg4 HWY_RVV_STORE4(uint, u, 8, _, _, mf4, _, _, -2, 32, StoreInterleaved4, sseg4) HWY_RVV_STORE4(uint
HWY_API Vec128< T, N/2 > LowerHalf(const Vec128< T, N > v)
Definition: arm_neon-inl.h:2952
typename D::Twice Twice
Definition: ops/shared-inl.h:220
Rebind< MakeSigned< TFromD< D > >, D > RebindToSigned
Definition: ops/shared-inl.h:199
HWY_API Vec32< uint8_t > U8FromU32(const Vec128< uint32_t > v)
Definition: arm_neon-inl.h:2748
HWY_API Vec128< float, N > ReorderWidenMulAccumulate(Simd< float, N, 0 > df32, Vec128< bfloat16_t, 2 *N > a, Vec128< bfloat16_t, 2 *N > b, const Vec128< float, N > sum0, Vec128< float, N > &sum1)
Definition: arm_neon-inl.h:3688
HWY_API Vec128< T, N > IfVecThenElse(Vec128< T, N > mask, Vec128< T, N > yes, Vec128< T, N > no)
Definition: arm_neon-inl.h:1505
HWY_API void BlendedStore(Vec128< T, N > v, Mask128< T, N > m, Simd< T, N, 0 > d, T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2402
HWY_API size_t CountTrue(Full128< T >, const Mask128< T > mask)
Definition: arm_neon-inl.h:4742
HWY_API Vec128< T, N > VecFromMask(Simd< T, N, 0 > d, const Mask128< T, N > v)
Definition: arm_neon-inl.h:1681
HWY_API intptr_t FindFirstTrue(const Simd< T, N, 0 >, const Mask128< T, N > mask)
Definition: arm_neon-inl.h:4753
HWY_API Vec128< float > ConvertTo(Full128< float >, const Vec128< int32_t > v)
Definition: arm_neon-inl.h:2788
HWY_API Vec128< T, N > DupEven(Vec128< T, N > v)
Definition: arm_neon-inl.h:3987
HWY_API Vec128< T, N > IfThenElseZero(const Mask128< T, N > mask, const Vec128< T, N > yes)
Definition: arm_neon-inl.h:1711
HWY_API V Add(V a, V b)
Definition: arm_neon-inl.h:5217
HWY_API bool AllFalse(const Full128< T > d, const Mask128< T > m)
Definition: arm_neon-inl.h:4771
HWY_API Vec128< T, N > TableLookupLanes(Vec128< T, N > v, Indices128< T, N > idx)
Definition: arm_neon-inl.h:3419
mf4
Definition: rvv-inl.h:1405
HWY_API Vec128< T, N > Reverse4(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:3490
HWY_API Vec128< float, N > Floor(const Vec128< float, N > v)
Definition: arm_neon-inl.h:2909
HWY_API Vec128< float, N > MulSub(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > sub)
Definition: arm_neon-inl.h:1344
HWY_API Vec128< T, N > CopySignToAbs(const Vec128< T, N > abs, const Vec128< T, N > sign)
Definition: arm_neon-inl.h:1656
HWY_API Vec128< T, N > ZeroIfNegative(Vec128< T, N > v)
Definition: arm_neon-inl.h:1735
HWY_API void StoreU(const Vec128< uint8_t > v, Full128< uint8_t >, uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2224
constexpr size_t MLenFromD(Simd< T, N, kPow2 >)
Definition: rvv-inl.h:43
HWY_API Vec128< float, N > Ceil(const Vec128< float, N > v)
Definition: arm_neon-inl.h:2895
Repartition< MakeNarrow< TFromD< D > >, D > RepartitionToNarrow
Definition: ops/shared-inl.h:212
HWY_API Indices128< T, N > IndicesFromVec(Simd< T, N, 0 > d, Vec128< TI, N > vec)
Definition: arm_neon-inl.h:3373
HWY_API Vec128< T, N > SwapAdjacentBlocks(Vec128< T, N > v)
Definition: arm_neon-inl.h:4045
HWY_API Vec128< T, N > ShiftLeftBytes(Simd< T, N, 0 >, Vec128< T, N > v)
Definition: arm_neon-inl.h:3091
HWY_INLINE VFromD< D > Min128(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:5203
HWY_API Vec128< T, N > Reverse2(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:3461
svuint16_t Set(Simd< bfloat16_t, N, kPow2 > d, bfloat16_t arg)
Definition: arm_sve-inl.h:282
HWY_API Vec128< T, N > Reverse8(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:3513
HWY_API Vec128< T, N > MaxOfLanes(Simd< T, N, 0 >, const Vec128< T, N > v)
Definition: arm_neon-inl.h:4445
Vec128< T, N > Iota(const Simd< T, N, 0 > d, const T2 first)
Definition: arm_neon-inl.h:757
HWY_API Vec1< uint8_t > AverageRound(const Vec1< uint8_t > a, const Vec1< uint8_t > b)
Definition: scalar-inl.h:510
HWY_API Vec1< T > ShiftRight(const Vec1< T > v)
Definition: scalar-inl.h:345
HWY_API Mask128< T, N > LoadMaskBits(Simd< T, N, 0 > d, const uint8_t *HWY_RESTRICT bits)
Definition: arm_neon-inl.h:4510
HWY_API Vec128< T > Shuffle01(const Vec128< T > v)
Definition: arm_neon-inl.h:3535
HWY_API Mask128< uint64_t, N > TestBit(Vec128< uint64_t, N > v, Vec128< uint64_t, N > bit)
Definition: arm_neon-inl.h:1917
sseg3 sseg3 sseg4 sseg4 m2
Definition: rvv-inl.h:1436
HWY_API Vec128< uint8_t > LoadU(Full128< uint8_t >, const uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2031
HWY_API Vec128< int16_t > MulHigh(const Vec128< int16_t > a, const Vec128< int16_t > b)
Definition: arm_neon-inl.h:1175
HWY_API Vec1< uint8_t > SaturatedSub(const Vec1< uint8_t > a, const Vec1< uint8_t > b)
Definition: scalar-inl.h:484
HWY_INLINE Mask128< T, N > Lt128(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition: arm_neon-inl.h:5172
HWY_API Vec128< T, N > OrAnd(Vec128< T, N > o, Vec128< T, N > a1, Vec128< T, N > a2)
Definition: arm_neon-inl.h:1498
HWY_API Vec128< T, N > IfNegativeThenElse(Vec128< T, N > v, Vec128< T, N > yes, Vec128< T, N > no)
Definition: arm_neon-inl.h:1724
HWY_API Vec128< T, N > ConcatUpperLower(Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:3895
HWY_API Vec128< T, N > And(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1440
typename detail::ScalableTagChecker< T, kPow2 >::type ScalableTag
Definition: ops/shared-inl.h:162
HWY_API Vec128< T, N > BitCast(Simd< T, N, 0 > d, Vec128< FromT, N *sizeof(T)/sizeof(FromT)> v)
Definition: arm_neon-inl.h:710
HWY_API Vec128< int16_t > MulFixedPoint15(Vec128< int16_t > a, Vec128< int16_t > b)
Definition: arm_neon-inl.h:1211
HWY_API void ScatterOffset(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT base, const Vec128< Offset, N > offset)
Definition: arm_neon-inl.h:4231
HWY_API VFromD< DW > ZipUpper(DW dw, V a, V b)
Definition: arm_neon-inl.h:3681
HWY_API Vec128< T, N > ConcatLowerLower(const Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:3777
HWY_API V Sub(V a, V b)
Definition: arm_neon-inl.h:5221
typename D::template Rebind< T > Rebind
Definition: ops/shared-inl.h:196
HWY_API Vec128< T, N > Undefined(Simd< T, N, 0 >)
Definition: arm_neon-inl.h:747
HWY_INLINE constexpr HWY_MAYBE_UNUSED size_t MaxLanes(D)
Definition: ops/shared-inl.h:271
HWY_API size_t CompressBitsStore(Vec128< T, N > v, const uint8_t *HWY_RESTRICT bits, Simd< T, N, 0 > d, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:5077
HWY_API V InterleaveUpper(Simd< T, N, 0 >, V a, V b)
Definition: arm_neon-inl.h:3656
HWY_API Vec128< T, N > GatherOffset(const Simd< T, N, 0 > d, const T *HWY_RESTRICT base, const Vec128< Offset, N > offset)
Definition: arm_neon-inl.h:4267
HWY_API size_t CompressBlendedStore(Vec128< T, N > v, Mask128< T, N > m, Simd< T, N, 0 > d, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:5061
sseg3 m1
Definition: rvv-inl.h:1409
HWY_API Vec128< T, N > IfThenZeroElse(const Mask128< T, N > mask, const Vec128< T, N > no)
Definition: arm_neon-inl.h:1718
HWY_API Vec128< T, N > Xor(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1489
HWY_INLINE VFromD< D > Max128(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:5208
HWY_API Vec1< T > ShiftLeft(const Vec1< T > v)
Definition: scalar-inl.h:339
HWY_API auto Le(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5266
decltype(detail::DeduceD()(V())) DFromV
Definition: arm_neon-inl.h:555
HWY_API Vec128< int32_t, N > NearestInt(const Vec128< float, N > v)
Definition: arm_neon-inl.h:2939
sseg3 sseg3 mf8
Definition: rvv-inl.h:1428
HWY_API Indices128< T, N > SetTableIndices(Simd< T, N, 0 > d, const TI *idx)
Definition: arm_neon-inl.h:3413
HWY_API void ScatterIndex(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT base, const Vec128< Index, N > index)
Definition: arm_neon-inl.h:4249
HWY_API Vec128< T > Not(const Vec128< T > v)
Definition: arm_neon-inl.h:1422
HWY_API Vec128< float, N > NegMulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1323
HWY_API Vec64< uint32_t > Shuffle2301(const Vec64< uint32_t > v)
Definition: arm_neon-inl.h:1778
V Shr(V a, V b)
Definition: arm_neon-inl.h:5239
decltype(Zero(D())) VFromD
Definition: arm_neon-inl.h:743
HWY_API Vec128< T, N > LoadDup128(Simd< T, N, 0 > d, const T *const HWY_RESTRICT p)
Definition: arm_neon-inl.h:2217
HWY_API Vec128< T, N > OddEven(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:4019
HWY_API Vec128< uint16_t > Broadcast(const Vec128< uint16_t > v)
Definition: arm_neon-inl.h:3285
HWY_INLINE Vec128< T, N > CompressBits(Vec128< T, N > v, const uint8_t *HWY_RESTRICT bits)
Definition: arm_neon-inl.h:5038
HWY_API Vec128< T > Shuffle0123(const Vec128< T > v)
Definition: arm_neon-inl.h:3553
HWY_API Vec128< float, N > Trunc(const Vec128< float, N > v)
Definition: arm_neon-inl.h:2867
typename D::Half Half
Definition: ops/shared-inl.h:216
HWY_API Vec128< T, N > MinOfLanes(Simd< T, N, 0 >, const Vec128< T, N > v)
Definition: arm_neon-inl.h:4441
HWY_API Vec128< T, N > ShiftRightBytes(Simd< T, N, 0 >, Vec128< T, N > v)
Definition: arm_neon-inl.h:3114
typename D::template Repartition< T > Repartition
Definition: ops/shared-inl.h:207
HWY_API auto Ne(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5248
N
Definition: rvv-inl.h:1656
HWY_API Vec128< float, N > Sqrt(const Vec128< float, N > v)
Definition: arm_neon-inl.h:1404
HWY_API Vec64< uint16_t > DemoteTo(Full64< uint16_t >, const Vec128< int32_t > v)
Definition: arm_neon-inl.h:2606
HWY_API Vec128< TI > TableLookupBytes(const Vec128< T > bytes, const Vec128< TI > from)
Definition: arm_neon-inl.h:4169
HWY_API size_t CompressStore(Vec128< T, N > v, const Mask128< T, N > mask, Simd< T, N, 0 > d, T *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:5052
HWY_API Vec128< uint32_t, N > RotateRight(const Vec128< uint32_t, N > v)
Definition: arm_neon-inl.h:935
HWY_API Vec128< T, N > AndNot(const Vec128< T, N > not_mask, const Vec128< T, N > mask)
Definition: arm_neon-inl.h:1455
HWY_API Vec128< T > ReverseBlocks(Full128< T >, const Vec128< T > v)
Definition: arm_neon-inl.h:4053
HWY_API V Div(V a, V b)
Definition: arm_neon-inl.h:5230
HWY_API Vec128< uint64_t > SumsOf8(const Vec128< uint8_t > v)
Definition: arm_neon-inl.h:852
HWY_API V Mul(V a, V b)
Definition: arm_neon-inl.h:5226
HWY_API Vec128< T, 1 > Reverse(Simd< T, 1, 0 >, const Vec128< T, 1 > v)
Definition: arm_neon-inl.h:3430
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2397
HWY_API Vec128< uint16_t > PromoteTo(Full128< uint16_t >, const Vec64< uint8_t > v)
Definition: arm_neon-inl.h:2426
TFromD< DFromV< V > > TFromV
Definition: arm_neon-inl.h:558
HWY_API Vec64< uint8_t > UpperHalf(Full64< uint8_t >, const Vec128< uint8_t > v)
Definition: arm_neon-inl.h:3146
HWY_API Vec128< float > ApproximateReciprocalSqrt(const Vec128< float > v)
Definition: arm_neon-inl.h:1376
const vfloat64m1_t v
Definition: rvv-inl.h:1656
HWY_API V Trunc(const V v)
Definition: rvv-inl.h:2597
HWY_API Vec128< T, N > Compress(Vec128< T, N > v, const Mask128< T, N > mask)
Definition: arm_neon-inl.h:5031
HWY_API Vec128< T, N > ShiftRightLanes(Simd< T, N, 0 > d, const Vec128< T, N > v)
Definition: arm_neon-inl.h:3120
HWY_API V CombineShiftRightLanes(const D d, const V hi, V lo)
Definition: rvv-inl.h:2108
typename D::T TFromD
Definition: ops/shared-inl.h:192
HWY_API VI TableLookupBytesOr0(const V bytes, const VI from)
Definition: arm_neon-inl.h:4224
u
Definition: rvv-inl.h:1405
HWY_API Vec128< T, N > Or(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1477
HWY_API Vec128< float, N > NegMulSub(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > sub)
Definition: arm_neon-inl.h:1352
HWY_API Vec1< T > IfThenElse(const Mask1< T > mask, const Vec1< T > yes, const Vec1< T > no)
Definition: scalar-inl.h:278
Definition: aligned_allocator.h:27
constexpr T MantissaEnd()
Definition: base.h:570
typename EnableIfT< Condition >::type EnableIf
Definition: base.h:273
constexpr HWY_API bool IsSame()
Definition: base.h:286
constexpr size_t CeilLog2(TI x)
Definition: base.h:700
constexpr HWY_API bool IsSigned()
Definition: base.h:483
typename detail::Relations< T >::Unsigned MakeUnsigned
Definition: base.h:452
#define HWY_IF_LANE_SIZE_D(D, bytes)
Definition: ops/shared-inl.h:227
#define HWY_RVV_FOREACH_I163264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:314
#define HWY_RVV_GATHER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1364
#define HWY_RVV_PROMOTE_X2(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN)
Definition: rvv-inl.h:1475
#define HWY_RVV_CAST_U8(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:531
#define HWY_RVV_RETM_ARGVV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:977
#define HWY_RVV_FOREACH_U163264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:309
#define HWY_RVV_UNDEFINED(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:472
#define HWY_RVV_MASKED_LOAD(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1267
#define HWY_RVV_FOREACH_I16(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:269
#define HWY_RVV_SCATTER(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1335
#define HWY_RVV_FOREACH_F3264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:323
#define HWY_RVV_CAST_IF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:574
#define HWY_RVV_FOREACH_U64(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:263
#define HWY_RVV_FOREACH_F(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:340
#define HWY_RVV_BLENDED_STORE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1293
#define HWY_RVV_PROMOTE_X4(OP, BASE, CHAR, BITS, BASE_IN, BITS_IN)
Definition: rvv-inl.h:1482
#define HWY_RVV_REDUCE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:2378
#define HWY_RVV_CAST_VIRT_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:590
#define HWY_RVV_SLIDE1(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1849
#define HWY_RVV_NEAREST(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1704
#define HWY_RVV_STOREN(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1306
#define HWY_RVV_EXT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:503
#define HWY_RVV_FOREACH_UI16(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:293
#define HWY_RVV_SHIFT_II(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:869
#define HWY_RVV_FMA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:949
#define HWY_RVV_DEMOTE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1533
#define HWY_RVV_LANES(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:379
#define HWY_RVV_PROMOTE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1466
#define HWY_RVV_SHIFT_VV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:860
#define HWY_RVV_SHIFT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:805
#define HWY_RVV_FOREACH_UI32(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:297
#define HWY_RVV_VEC_FROM_MASK(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1132
#define HWY_RVV_IF_POW2_IN(D, min, max)
Definition: rvv-inl.h:39
#define HWY_RVV_IF_THEN_ELSE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1080
#define HWY_RVV_TABLE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1938
#define HWY_RVV_FOREACH_B(X_MACRO, NAME, OP)
Definition: rvv-inl.h:59
#define HWY_RVV_RETV_ARGVS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:417
#define HWY_RVV_FOREACH_F16(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:281
#define HWY_RVV_FOREACH_I32(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:271
#define HWY_RVV_FOREACH_UI163264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:319
#define HWY_RVV_RETM_ARGM(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1049
#define HWY_RVV_RETV_ARGV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:410
#define HWY_SPECIALIZE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:364
#define HWY_RVV_LOAD_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP)
#define HWY_RVV_ALL_TRUE(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1205
#define HWY_RVV_FOREACH_U08(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:257
#define HWY_RVV_SLIDE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1745
#define HWY_RVV_COMPRESS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:2064
#define HWY_RVV_FOREACH(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:349
#define HWY_RVV_SET(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:436
#define HWY_RVV_DEMOTE_F(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1638
#define HWY_RVV_TRUNC(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:494
#define HWY_RVV_FOREACH_F32(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:283
#define HWY_RVV_CAST_I8(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:545
#define HWY_RVV_RETM_ARGMM(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1061
#define HWY_RVV_FOREACH_I(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:334
#define HWY_RVV_LOAD(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1231
#define HWY_RVV_DEMOTE_I_TO_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1553
#define HWY_RVV_FOREACH_U32(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:261
#define HWY_RVV_IOTA(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:665
#define HWY_RVV_CAST_VIRT_IF(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:606
#define HWY_RVV_FOREACH_UI3264(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:305
#define HWY_RVV_IF_THEN_ZERO_ELSE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1100
#define HWY_RVV_STORE_MASK_BITS(SEW, SHIFT, MLEN, NAME, OP)
#define HWY_RVV_FIND_FIRST_TRUE(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1187
#define HWY_RVV_RETV_ARGV2(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
#define HWY_RVV_EXT_VIRT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:516
#define HWY_RVV_CAST_U(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:560
#define HWY_RVV_STORE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1280
#define HWY_RVV_GET_LANE(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1864
#define HWY_RVV_RETM_ARGVS(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:986
#define HWY_RVV_FOREACH_U(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:328
#define HWY_RVV_FOREACH_UI(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:345
#define HWY_RVV_CONVERT(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1686
#define HWY_RVV_RETV_ARGVV(BASE, CHAR, SEW, SEWD, SEWH, LMUL, LMULD, LMULH, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:425
#define HWY_RVV_FOREACH_U16(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:259
#define HWY_RVV_COUNT_TRUE(SEW, SHIFT, MLEN, NAME, OP)
Definition: rvv-inl.h:1217
#define HWY_RVV_FOREACH_I08(X_MACRO, NAME, OP, LMULS)
Definition: rvv-inl.h:267
#define HWY_NAMESPACE
Definition: set_macros-inl.h:80
@ value
Definition: arm_neon-inl.h:4798
Definition: ops/shared-inl.h:40