Grok 10.0.0
set_macros-inl.h
Go to the documentation of this file.
1// Copyright 2020 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// Sets macros based on HWY_TARGET.
17
18// This include guard is toggled by foreach_target, so avoid the usual _H_
19// suffix to prevent copybara from renaming it.
20#if defined(HWY_SET_MACROS_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
21#ifdef HWY_SET_MACROS_PER_TARGET
22#undef HWY_SET_MACROS_PER_TARGET
23#else
24#define HWY_SET_MACROS_PER_TARGET
25#endif
26
27#endif // HWY_SET_MACROS_PER_TARGET
28
29#include "hwy/detect_targets.h"
30
31#undef HWY_NAMESPACE
32#undef HWY_ALIGN
33#undef HWY_MAX_BYTES
34#undef HWY_LANES
35
36#undef HWY_HAVE_SCALABLE
37#undef HWY_HAVE_INTEGER64
38#undef HWY_HAVE_FLOAT16
39#undef HWY_HAVE_FLOAT64
40#undef HWY_MEM_OPS_MIGHT_FAULT
41#undef HWY_NATIVE_FMA
42#undef HWY_CAP_GE256
43#undef HWY_CAP_GE512
44
45#undef HWY_TARGET_STR
46
47#if defined(HWY_DISABLE_PCLMUL_AES)
48#define HWY_TARGET_STR_PCLMUL_AES ""
49#else
50#define HWY_TARGET_STR_PCLMUL_AES ",pclmul,aes"
51#endif
52
53#if defined(HWY_DISABLE_BMI2_FMA)
54#define HWY_TARGET_STR_BMI2_FMA ""
55#else
56#define HWY_TARGET_STR_BMI2_FMA ",bmi,bmi2,fma"
57#endif
58
59#if defined(HWY_DISABLE_F16C)
60#define HWY_TARGET_STR_F16C ""
61#else
62#define HWY_TARGET_STR_F16C ",f16c"
63#endif
64
65#define HWY_TARGET_STR_SSSE3 "sse2,ssse3"
66
67#define HWY_TARGET_STR_SSE4 \
68 HWY_TARGET_STR_SSSE3 ",sse4.1,sse4.2" HWY_TARGET_STR_PCLMUL_AES
69// Include previous targets, which are the half-vectors of the next target.
70#define HWY_TARGET_STR_AVX2 \
71 HWY_TARGET_STR_SSE4 ",avx,avx2" HWY_TARGET_STR_BMI2_FMA HWY_TARGET_STR_F16C
72#define HWY_TARGET_STR_AVX3 \
73 HWY_TARGET_STR_AVX2 ",avx512f,avx512vl,avx512dq,avx512bw"
74
75// Before include guard so we redefine HWY_TARGET_STR on each include,
76// governed by the current HWY_TARGET.
77
78//-----------------------------------------------------------------------------
79// SSSE3
80#if HWY_TARGET == HWY_SSSE3
81
82#define HWY_NAMESPACE N_SSSE3
83#define HWY_ALIGN alignas(16)
84#define HWY_MAX_BYTES 16
85#define HWY_LANES(T) (16 / sizeof(T))
86
87#define HWY_HAVE_SCALABLE 0
88#define HWY_HAVE_INTEGER64 1
89#define HWY_HAVE_FLOAT16 1
90#define HWY_HAVE_FLOAT64 1
91#define HWY_MEM_OPS_MIGHT_FAULT 1
92#define HWY_NATIVE_FMA 0
93#define HWY_CAP_GE256 0
94#define HWY_CAP_GE512 0
95
96#define HWY_TARGET_STR HWY_TARGET_STR_SSSE3
97
98//-----------------------------------------------------------------------------
99// SSE4
100#elif HWY_TARGET == HWY_SSE4
101
102#define HWY_NAMESPACE N_SSE4
103#define HWY_ALIGN alignas(16)
104#define HWY_MAX_BYTES 16
105#define HWY_LANES(T) (16 / sizeof(T))
106
107#define HWY_HAVE_SCALABLE 0
108#define HWY_HAVE_INTEGER64 1
109#define HWY_HAVE_FLOAT16 1
110#define HWY_HAVE_FLOAT64 1
111#define HWY_MEM_OPS_MIGHT_FAULT 1
112#define HWY_NATIVE_FMA 0
113#define HWY_CAP_GE256 0
114#define HWY_CAP_GE512 0
115
116#define HWY_TARGET_STR HWY_TARGET_STR_SSE4
117
118//-----------------------------------------------------------------------------
119// AVX2
120#elif HWY_TARGET == HWY_AVX2
121
122#define HWY_NAMESPACE N_AVX2
123#define HWY_ALIGN alignas(32)
124#define HWY_MAX_BYTES 32
125#define HWY_LANES(T) (32 / sizeof(T))
126
127#define HWY_HAVE_SCALABLE 0
128#define HWY_HAVE_INTEGER64 1
129#define HWY_HAVE_FLOAT16 1
130#define HWY_HAVE_FLOAT64 1
131#define HWY_MEM_OPS_MIGHT_FAULT 1
132
133#ifdef HWY_DISABLE_BMI2_FMA
134#define HWY_NATIVE_FMA 0
135#else
136#define HWY_NATIVE_FMA 1
137#endif
138
139#define HWY_CAP_GE256 1
140#define HWY_CAP_GE512 0
141
142#define HWY_TARGET_STR HWY_TARGET_STR_AVX2
143
144//-----------------------------------------------------------------------------
145// AVX3[_DL]
146#elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL
147
148#define HWY_ALIGN alignas(64)
149#define HWY_MAX_BYTES 64
150#define HWY_LANES(T) (64 / sizeof(T))
151
152#define HWY_HAVE_SCALABLE 0
153#define HWY_HAVE_INTEGER64 1
154#define HWY_HAVE_FLOAT16 1
155#define HWY_HAVE_FLOAT64 1
156#define HWY_MEM_OPS_MIGHT_FAULT 0
157#define HWY_NATIVE_FMA 1
158#define HWY_CAP_GE256 1
159#define HWY_CAP_GE512 1
160
161#if HWY_TARGET == HWY_AVX3
162
163#define HWY_NAMESPACE N_AVX3
164#define HWY_TARGET_STR HWY_TARGET_STR_AVX3
165
166#elif HWY_TARGET == HWY_AVX3_DL
167
168#define HWY_NAMESPACE N_AVX3_DL
169#define HWY_TARGET_STR \
170 HWY_TARGET_STR_AVX3 \
171 ",vpclmulqdq,avx512vbmi,avx512vbmi2,vaes,avxvnni,avx512bitalg," \
172 "avx512vpopcntdq"
173
174#else
175#error "Logic error"
176#endif // HWY_TARGET == HWY_AVX3_DL
177
178//-----------------------------------------------------------------------------
179// PPC8
180#elif HWY_TARGET == HWY_PPC8
181
182#define HWY_ALIGN alignas(16)
183#define HWY_MAX_BYTES 16
184#define HWY_LANES(T) (16 / sizeof(T))
185
186#define HWY_HAVE_SCALABLE 0
187#define HWY_HAVE_INTEGER64 1
188#define HWY_HAVE_FLOAT16 0
189#define HWY_HAVE_FLOAT64 1
190#define HWY_MEM_OPS_MIGHT_FAULT 1
191#define HWY_NATIVE_FMA 1
192#define HWY_CAP_GE256 0
193#define HWY_CAP_GE512 0
194
195#define HWY_NAMESPACE N_PPC8
196
197#define HWY_TARGET_STR "altivec,vsx"
198
199//-----------------------------------------------------------------------------
200// NEON
201#elif HWY_TARGET == HWY_NEON
202
203#define HWY_ALIGN alignas(16)
204#define HWY_MAX_BYTES 16
205#define HWY_LANES(T) (16 / sizeof(T))
206
207#define HWY_HAVE_SCALABLE 0
208#define HWY_HAVE_INTEGER64 1
209#define HWY_HAVE_FLOAT16 1
210
211#if HWY_ARCH_ARM_A64
212#define HWY_HAVE_FLOAT64 1
213#else
214#define HWY_HAVE_FLOAT64 0
215#endif
216
217#define HWY_MEM_OPS_MIGHT_FAULT 1
218
219#if defined(__ARM_VFPV4__) || HWY_ARCH_ARM_A64
220#define HWY_NATIVE_FMA 1
221#else
222#define HWY_NATIVE_FMA 0
223#endif
224
225#define HWY_CAP_GE256 0
226#define HWY_CAP_GE512 0
227
228#define HWY_NAMESPACE N_NEON
229
230// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
231
232//-----------------------------------------------------------------------------
233// SVE[2]
234#elif HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE || \
235 HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
236
237// SVE only requires lane alignment, not natural alignment of the entire vector.
238#define HWY_ALIGN alignas(8)
239
240// Value ensures MaxLanes() is the tightest possible upper bound to reduce
241// overallocation.
242#define HWY_LANES(T) ((HWY_MAX_BYTES) / sizeof(T))
243
244#define HWY_HAVE_SCALABLE 1
245#define HWY_HAVE_INTEGER64 1
246#define HWY_HAVE_FLOAT16 1
247#define HWY_HAVE_FLOAT64 1
248#define HWY_MEM_OPS_MIGHT_FAULT 0
249#define HWY_NATIVE_FMA 1
250#define HWY_CAP_GE256 0
251#define HWY_CAP_GE512 0
252
253#if HWY_TARGET == HWY_SVE2
254#define HWY_NAMESPACE N_SVE2
255#define HWY_MAX_BYTES 256
256#elif HWY_TARGET == HWY_SVE_256
257#define HWY_NAMESPACE N_SVE_256
258#define HWY_MAX_BYTES 32
259#elif HWY_TARGET == HWY_SVE2_128
260#define HWY_NAMESPACE N_SVE2_128
261#define HWY_MAX_BYTES 16
262#else
263#define HWY_NAMESPACE N_SVE
264#define HWY_MAX_BYTES 256
265#endif
266
267// HWY_TARGET_STR remains undefined
268
269//-----------------------------------------------------------------------------
270// WASM
271#elif HWY_TARGET == HWY_WASM
272
273#define HWY_ALIGN alignas(16)
274#define HWY_MAX_BYTES 16
275#define HWY_LANES(T) (16 / sizeof(T))
276
277#define HWY_HAVE_SCALABLE 0
278#define HWY_HAVE_INTEGER64 1
279#define HWY_HAVE_FLOAT16 1
280#define HWY_HAVE_FLOAT64 0
281#define HWY_MEM_OPS_MIGHT_FAULT 1
282#define HWY_NATIVE_FMA 0
283#define HWY_CAP_GE256 0
284#define HWY_CAP_GE512 0
285
286#define HWY_NAMESPACE N_WASM
287
288#define HWY_TARGET_STR "simd128"
289
290//-----------------------------------------------------------------------------
291// WASM2
292#elif HWY_TARGET == HWY_WASM_EMU256
293
294#define HWY_ALIGN alignas(32)
295#define HWY_MAX_BYTES 32
296#define HWY_LANES(T) (32 / sizeof(T))
297
298#define HWY_HAVE_SCALABLE 0
299#define HWY_HAVE_INTEGER64 1
300#define HWY_HAVE_FLOAT16 1
301#define HWY_HAVE_FLOAT64 0
302#define HWY_MEM_OPS_MIGHT_FAULT 1
303#define HWY_NATIVE_FMA 0
304#define HWY_CAP_GE256 0
305#define HWY_CAP_GE512 0
306
307#define HWY_NAMESPACE N_WASM2
308
309#define HWY_TARGET_STR "simd128"
310
311//-----------------------------------------------------------------------------
312// RVV
313#elif HWY_TARGET == HWY_RVV
314
315// RVV only requires lane alignment, not natural alignment of the entire vector,
316// and the compiler already aligns builtin types, so nothing to do here.
317#define HWY_ALIGN
318
319// The spec requires VLEN <= 2^16 bits, so the limit is 2^16 bytes (LMUL=8).
320#define HWY_MAX_BYTES 65536
321
322// = HWY_MAX_BYTES divided by max LMUL=8 because MaxLanes includes the actual
323// LMUL. This is the tightest possible upper bound.
324#define HWY_LANES(T) (8192 / sizeof(T))
325
326#define HWY_HAVE_SCALABLE 1
327#define HWY_HAVE_INTEGER64 1
328#define HWY_HAVE_FLOAT64 1
329#define HWY_MEM_OPS_MIGHT_FAULT 0
330#define HWY_NATIVE_FMA 1
331#define HWY_CAP_GE256 0
332#define HWY_CAP_GE512 0
333
334#if defined(__riscv_zfh)
335#define HWY_HAVE_FLOAT16 1
336#else
337#define HWY_HAVE_FLOAT16 0
338#endif
339
340#define HWY_NAMESPACE N_RVV
341
342// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
343// (rv64gcv is not a valid target)
344
345//-----------------------------------------------------------------------------
346// EMU128
347#elif HWY_TARGET == HWY_EMU128
348
349#define HWY_ALIGN alignas(16)
350#define HWY_MAX_BYTES 16
351#define HWY_LANES(T) (16 / sizeof(T))
352
353#define HWY_HAVE_SCALABLE 0
354#define HWY_HAVE_INTEGER64 1
355#define HWY_HAVE_FLOAT16 1
356#define HWY_HAVE_FLOAT64 1
357#define HWY_MEM_OPS_MIGHT_FAULT 1
358#define HWY_NATIVE_FMA 0
359#define HWY_CAP_GE256 0
360#define HWY_CAP_GE512 0
361
362#define HWY_NAMESPACE N_EMU128
363
364// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
365
366//-----------------------------------------------------------------------------
367// SCALAR
368#elif HWY_TARGET == HWY_SCALAR
369
370#define HWY_ALIGN
371#define HWY_MAX_BYTES 8
372#define HWY_LANES(T) 1
373
374#define HWY_HAVE_SCALABLE 0
375#define HWY_HAVE_INTEGER64 1
376#define HWY_HAVE_FLOAT16 1
377#define HWY_HAVE_FLOAT64 1
378#define HWY_MEM_OPS_MIGHT_FAULT 0
379#define HWY_NATIVE_FMA 0
380#define HWY_CAP_GE256 0
381#define HWY_CAP_GE512 0
382
383#define HWY_NAMESPACE N_SCALAR
384
385// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
386
387#else
388#pragma message("HWY_TARGET does not match any known target")
389#endif // HWY_TARGET
390
391// Override this to 1 in asan/msan builds, which will still fault.
392#if HWY_IS_ASAN || HWY_IS_MSAN
393#undef HWY_MEM_OPS_MIGHT_FAULT
394#define HWY_MEM_OPS_MIGHT_FAULT 1
395#endif
396
397// Clang <9 requires this be invoked at file scope, before any namespace.
398#undef HWY_BEFORE_NAMESPACE
399#if defined(HWY_TARGET_STR)
400#define HWY_BEFORE_NAMESPACE() \
401 HWY_PUSH_ATTRIBUTES(HWY_TARGET_STR) \
402 static_assert(true, "For requiring trailing semicolon")
403#else
404// avoids compiler warning if no HWY_TARGET_STR
405#define HWY_BEFORE_NAMESPACE() \
406 static_assert(true, "For requiring trailing semicolon")
407#endif
408
409// Clang <9 requires any namespaces be closed before this macro.
410#undef HWY_AFTER_NAMESPACE
411#if defined(HWY_TARGET_STR)
412#define HWY_AFTER_NAMESPACE() \
413 HWY_POP_ATTRIBUTES \
414 static_assert(true, "For requiring trailing semicolon")
415#else
416// avoids compiler warning if no HWY_TARGET_STR
417#define HWY_AFTER_NAMESPACE() \
418 static_assert(true, "For requiring trailing semicolon")
419#endif
420
421#undef HWY_ATTR
422#if defined(HWY_TARGET_STR) && HWY_HAS_ATTRIBUTE(target)
423#define HWY_ATTR __attribute__((target(HWY_TARGET_STR)))
424#else
425#define HWY_ATTR
426#endif