Grok  9.7.5
set_macros-inl.h
Go to the documentation of this file.
1 // Copyright 2020 Google LLC
2 // SPDX-License-Identifier: Apache-2.0
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 // Sets macros based on HWY_TARGET.
17 
18 // This include guard is toggled by foreach_target, so avoid the usual _H_
19 // suffix to prevent copybara from renaming it.
20 #if defined(HWY_SET_MACROS_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
21 #ifdef HWY_SET_MACROS_PER_TARGET
22 #undef HWY_SET_MACROS_PER_TARGET
23 #else
24 #define HWY_SET_MACROS_PER_TARGET
25 #endif
26 
27 #endif // HWY_SET_MACROS_PER_TARGET
28 
29 #include "hwy/detect_targets.h"
30 
31 #undef HWY_NAMESPACE
32 #undef HWY_ALIGN
33 #undef HWY_MAX_BYTES
34 #undef HWY_LANES
35 
36 #undef HWY_HAVE_SCALABLE
37 #undef HWY_HAVE_INTEGER64
38 #undef HWY_HAVE_FLOAT16
39 #undef HWY_HAVE_FLOAT64
40 #undef HWY_MEM_OPS_MIGHT_FAULT
41 #undef HWY_CAP_GE256
42 #undef HWY_CAP_GE512
43 
44 #undef HWY_TARGET_STR
45 
46 #if defined(HWY_DISABLE_PCLMUL_AES)
47 #define HWY_TARGET_STR_PCLMUL_AES ""
48 #else
49 #define HWY_TARGET_STR_PCLMUL_AES ",pclmul,aes"
50 #endif
51 
52 #if defined(HWY_DISABLE_BMI2_FMA)
53 #define HWY_TARGET_STR_BMI2_FMA ""
54 #else
55 #define HWY_TARGET_STR_BMI2_FMA ",bmi,bmi2,fma"
56 #endif
57 
58 #if defined(HWY_DISABLE_F16C)
59 #define HWY_TARGET_STR_F16C ""
60 #else
61 #define HWY_TARGET_STR_F16C ",f16c"
62 #endif
63 
64 #define HWY_TARGET_STR_SSSE3 "sse2,ssse3"
65 
66 #define HWY_TARGET_STR_SSE4 \
67  HWY_TARGET_STR_SSSE3 ",sse4.1,sse4.2" HWY_TARGET_STR_PCLMUL_AES
68 // Include previous targets, which are the half-vectors of the next target.
69 #define HWY_TARGET_STR_AVX2 \
70  HWY_TARGET_STR_SSE4 ",avx,avx2" HWY_TARGET_STR_BMI2_FMA HWY_TARGET_STR_F16C
71 #define HWY_TARGET_STR_AVX3 \
72  HWY_TARGET_STR_AVX2 ",avx512f,avx512vl,avx512dq,avx512bw"
73 
74 // Before include guard so we redefine HWY_TARGET_STR on each include,
75 // governed by the current HWY_TARGET.
76 //-----------------------------------------------------------------------------
77 // SSSE3
78 #if HWY_TARGET == HWY_SSSE3
79 
80 #define HWY_NAMESPACE N_SSSE3
81 #define HWY_ALIGN alignas(16)
82 #define HWY_MAX_BYTES 16
83 #define HWY_LANES(T) (16 / sizeof(T))
84 
85 #define HWY_HAVE_SCALABLE 0
86 #define HWY_HAVE_INTEGER64 1
87 #define HWY_HAVE_FLOAT16 1
88 #define HWY_HAVE_FLOAT64 1
89 #define HWY_MEM_OPS_MIGHT_FAULT 1
90 #define HWY_CAP_AES 0
91 #define HWY_CAP_GE256 0
92 #define HWY_CAP_GE512 0
93 
94 #define HWY_TARGET_STR HWY_TARGET_STR_SSSE3
95 //-----------------------------------------------------------------------------
96 // SSE4
97 #elif HWY_TARGET == HWY_SSE4
98 
99 #define HWY_NAMESPACE N_SSE4
100 #define HWY_ALIGN alignas(16)
101 #define HWY_MAX_BYTES 16
102 #define HWY_LANES(T) (16 / sizeof(T))
103 
104 #define HWY_HAVE_SCALABLE 0
105 #define HWY_HAVE_INTEGER64 1
106 #define HWY_HAVE_FLOAT16 1
107 #define HWY_HAVE_FLOAT64 1
108 #define HWY_MEM_OPS_MIGHT_FAULT 1
109 #define HWY_CAP_GE256 0
110 #define HWY_CAP_GE512 0
111 
112 #define HWY_TARGET_STR HWY_TARGET_STR_SSE4
113 
114 //-----------------------------------------------------------------------------
115 // AVX2
116 #elif HWY_TARGET == HWY_AVX2
117 
118 #define HWY_NAMESPACE N_AVX2
119 #define HWY_ALIGN alignas(32)
120 #define HWY_MAX_BYTES 32
121 #define HWY_LANES(T) (32 / sizeof(T))
122 
123 #define HWY_HAVE_SCALABLE 0
124 #define HWY_HAVE_INTEGER64 1
125 #define HWY_HAVE_FLOAT16 1
126 #define HWY_HAVE_FLOAT64 1
127 #define HWY_MEM_OPS_MIGHT_FAULT 1
128 #define HWY_CAP_GE256 1
129 #define HWY_CAP_GE512 0
130 
131 #define HWY_TARGET_STR HWY_TARGET_STR_AVX2
132 
133 //-----------------------------------------------------------------------------
134 // AVX3[_DL]
135 #elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL
136 
137 #define HWY_ALIGN alignas(64)
138 #define HWY_MAX_BYTES 64
139 #define HWY_LANES(T) (64 / sizeof(T))
140 
141 #define HWY_HAVE_SCALABLE 0
142 #define HWY_HAVE_INTEGER64 1
143 #define HWY_HAVE_FLOAT16 1
144 #define HWY_HAVE_FLOAT64 1
145 #define HWY_MEM_OPS_MIGHT_FAULT 0
146 #define HWY_CAP_GE256 1
147 #define HWY_CAP_GE512 1
148 
149 #if HWY_TARGET == HWY_AVX3
150 
151 #define HWY_NAMESPACE N_AVX3
152 #define HWY_TARGET_STR HWY_TARGET_STR_AVX3
153 
154 #elif HWY_TARGET == HWY_AVX3_DL
155 
156 #define HWY_NAMESPACE N_AVX3_DL
157 #define HWY_TARGET_STR \
158  HWY_TARGET_STR_AVX3 \
159  ",vpclmulqdq,avx512vbmi2,vaes,avxvnni,avx512bitalg,avx512vpopcntdq"
160 
161 #else
162 #error "Logic error"
163 #endif // HWY_TARGET == HWY_AVX3_DL
164 
165 //-----------------------------------------------------------------------------
166 // PPC8
167 #elif HWY_TARGET == HWY_PPC8
168 
169 #define HWY_ALIGN alignas(16)
170 #define HWY_MAX_BYTES 16
171 #define HWY_LANES(T) (16 / sizeof(T))
172 
173 #define HWY_HAVE_SCALABLE 0
174 #define HWY_HAVE_INTEGER64 1
175 #define HWY_HAVE_FLOAT16 0
176 #define HWY_HAVE_FLOAT64 1
177 #define HWY_MEM_OPS_MIGHT_FAULT 1
178 #define HWY_CAP_GE256 0
179 #define HWY_CAP_GE512 0
180 
181 #define HWY_NAMESPACE N_PPC8
182 
183 #define HWY_TARGET_STR "altivec,vsx"
184 
185 //-----------------------------------------------------------------------------
186 // NEON
187 #elif HWY_TARGET == HWY_NEON
188 
189 #define HWY_ALIGN alignas(16)
190 #define HWY_MAX_BYTES 16
191 #define HWY_LANES(T) (16 / sizeof(T))
192 
193 #define HWY_HAVE_SCALABLE 0
194 #define HWY_HAVE_INTEGER64 1
195 #define HWY_HAVE_FLOAT16 1
196 #define HWY_CAP_GE256 0
197 #define HWY_CAP_GE512 0
198 
199 #if HWY_ARCH_ARM_A64
200 #define HWY_HAVE_FLOAT64 1
201 #else
202 #define HWY_HAVE_FLOAT64 0
203 #endif
204 #define HWY_MEM_OPS_MIGHT_FAULT 1
205 
206 #define HWY_NAMESPACE N_NEON
207 
208 // HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
209 
210 //-----------------------------------------------------------------------------
211 // SVE[2]
212 #elif HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE
213 
214 // SVE only requires lane alignment, not natural alignment of the entire vector.
215 #define HWY_ALIGN alignas(8)
216 
217 #define HWY_MAX_BYTES 256
218 
219 // Value ensures MaxLanes() is the tightest possible upper bound to reduce
220 // overallocation.
221 #define HWY_LANES(T) ((HWY_MAX_BYTES) / sizeof(T))
222 
223 #define HWY_HAVE_SCALABLE 1
224 #define HWY_HAVE_INTEGER64 1
225 #define HWY_HAVE_FLOAT16 1
226 #define HWY_HAVE_FLOAT64 1
227 #define HWY_MEM_OPS_MIGHT_FAULT 0
228 #define HWY_CAP_GE256 0
229 #define HWY_CAP_GE512 0
230 
231 #if HWY_TARGET == HWY_SVE2
232 #define HWY_NAMESPACE N_SVE2
233 #else
234 #define HWY_NAMESPACE N_SVE
235 #endif
236 
237 // HWY_TARGET_STR remains undefined
238 
239 //-----------------------------------------------------------------------------
240 // WASM
241 #elif HWY_TARGET == HWY_WASM
242 
243 #define HWY_ALIGN alignas(16)
244 #define HWY_MAX_BYTES 16
245 #define HWY_LANES(T) (16 / sizeof(T))
246 
247 #define HWY_HAVE_SCALABLE 0
248 #define HWY_HAVE_INTEGER64 1
249 #define HWY_HAVE_FLOAT16 1
250 #define HWY_HAVE_FLOAT64 0
251 #define HWY_MEM_OPS_MIGHT_FAULT 1
252 #define HWY_CAP_GE256 0
253 #define HWY_CAP_GE512 0
254 
255 #define HWY_NAMESPACE N_WASM
256 
257 #define HWY_TARGET_STR "simd128"
258 
259 //-----------------------------------------------------------------------------
260 // WASM2
261 #elif HWY_TARGET == HWY_WASM2
262 
263 #define HWY_ALIGN alignas(32)
264 #define HWY_MAX_BYTES 32
265 #define HWY_LANES(T) (32 / sizeof(T))
266 
267 #define HWY_HAVE_SCALABLE 0
268 #define HWY_HAVE_INTEGER64 1
269 #define HWY_HAVE_FLOAT16 1
270 #define HWY_HAVE_FLOAT64 0
271 #define HWY_MEM_OPS_MIGHT_FAULT 1
272 #define HWY_CAP_GE256 0
273 #define HWY_CAP_GE512 0
274 
275 #define HWY_NAMESPACE N_WASM2
276 
277 #define HWY_TARGET_STR "simd128"
278 
279 //-----------------------------------------------------------------------------
280 // RVV
281 #elif HWY_TARGET == HWY_RVV
282 
283 // RVV only requires lane alignment, not natural alignment of the entire vector,
284 // and the compiler already aligns builtin types, so nothing to do here.
285 #define HWY_ALIGN
286 
287 // The spec requires VLEN <= 2^16 bits, so the limit is 2^16 bytes (LMUL=8).
288 #define HWY_MAX_BYTES 65536
289 
290 // = HWY_MAX_BYTES divided by max LMUL=8 because MaxLanes includes the actual
291 // LMUL. This is the tightest possible upper bound.
292 #define HWY_LANES(T) (8192 / sizeof(T))
293 
294 #define HWY_HAVE_SCALABLE 1
295 #define HWY_HAVE_INTEGER64 1
296 #define HWY_HAVE_FLOAT64 1
297 #define HWY_MEM_OPS_MIGHT_FAULT 0
298 #define HWY_CAP_GE256 0
299 #define HWY_CAP_GE512 0
300 
301 #if defined(__riscv_zfh)
302 #define HWY_HAVE_FLOAT16 1
303 #else
304 #define HWY_HAVE_FLOAT16 0
305 #endif
306 
307 #define HWY_NAMESPACE N_RVV
308 
309 // HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
310 // (rv64gcv is not a valid target)
311 
312 //-----------------------------------------------------------------------------
313 // SCALAR
314 #elif HWY_TARGET == HWY_SCALAR
315 
316 #define HWY_ALIGN
317 #define HWY_MAX_BYTES 8
318 #define HWY_LANES(T) 1
319 
320 #define HWY_HAVE_SCALABLE 0
321 #define HWY_HAVE_INTEGER64 1
322 #define HWY_HAVE_FLOAT16 1
323 #define HWY_HAVE_FLOAT64 1
324 #define HWY_MEM_OPS_MIGHT_FAULT 0
325 #define HWY_CAP_GE256 0
326 #define HWY_CAP_GE512 0
327 
328 #define HWY_NAMESPACE N_SCALAR
329 
330 // HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
331 
332 #else
333 #pragma message("HWY_TARGET does not match any known target")
334 #endif // HWY_TARGET
335 
336 // Override this to 1 in asan/msan builds, which will still fault.
337 #if HWY_IS_ASAN || HWY_IS_MSAN
338 #undef HWY_MEM_OPS_MIGHT_FAULT
339 #define HWY_MEM_OPS_MIGHT_FAULT 1
340 #endif
341 
342 // Clang <9 requires this be invoked at file scope, before any namespace.
343 #undef HWY_BEFORE_NAMESPACE
344 #if defined(HWY_TARGET_STR)
345 #define HWY_BEFORE_NAMESPACE() \
346  HWY_PUSH_ATTRIBUTES(HWY_TARGET_STR) \
347  static_assert(true, "For requiring trailing semicolon")
348 #else
349 // avoids compiler warning if no HWY_TARGET_STR
350 #define HWY_BEFORE_NAMESPACE() \
351  static_assert(true, "For requiring trailing semicolon")
352 #endif
353 
354 // Clang <9 requires any namespaces be closed before this macro.
355 #undef HWY_AFTER_NAMESPACE
356 #if defined(HWY_TARGET_STR)
357 #define HWY_AFTER_NAMESPACE() \
358  HWY_POP_ATTRIBUTES \
359  static_assert(true, "For requiring trailing semicolon")
360 #else
361 // avoids compiler warning if no HWY_TARGET_STR
362 #define HWY_AFTER_NAMESPACE() \
363  static_assert(true, "For requiring trailing semicolon")
364 #endif
365 
366 #undef HWY_ATTR
367 #if defined(HWY_TARGET_STR) && HWY_HAS_ATTRIBUTE(target)
368 #define HWY_ATTR __attribute__((target(HWY_TARGET_STR)))
369 #else
370 #define HWY_ATTR
371 #endif