Grok
9.5.0
src
lib
jp2
highway
hwy
ops
set_macros-inl.h
Go to the documentation of this file.
1
// Copyright 2020 Google LLC
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
// Sets macros based on HWY_TARGET.
16
17
// This include guard is toggled by foreach_target, so avoid the usual _H_
18
// suffix to prevent copybara from renaming it.
19
#if defined(HWY_SET_MACROS_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
20
#ifdef HWY_SET_MACROS_PER_TARGET
21
#undef HWY_SET_MACROS_PER_TARGET
22
#else
23
#define HWY_SET_MACROS_PER_TARGET
24
#endif
25
26
#endif
// HWY_SET_MACROS_PER_TARGET
27
28
#include "
hwy/detect_targets.h
"
29
30
#undef HWY_NAMESPACE
31
#undef HWY_ALIGN
32
#undef HWY_MAX_BYTES
33
#undef HWY_LANES
34
35
#undef HWY_CAP_INTEGER64
36
#undef HWY_CAP_FLOAT16
37
#undef HWY_CAP_FLOAT64
38
#undef HWY_CAP_GE256
39
#undef HWY_CAP_GE512
40
41
#undef HWY_TARGET_STR
42
43
#if defined(HWY_DISABLE_PCLMUL_AES)
44
#define HWY_TARGET_STR_PCLMUL_AES ""
45
#else
46
#define HWY_TARGET_STR_PCLMUL_AES ",pclmul,aes"
47
#endif
48
49
#if defined(HWY_DISABLE_BMI2_FMA)
50
#define HWY_TARGET_STR_BMI2_FMA ""
51
#else
52
#define HWY_TARGET_STR_BMI2_FMA ",bmi,bmi2,fma"
53
#endif
54
55
#if defined(HWY_DISABLE_F16C)
56
#define HWY_TARGET_STR_F16C ""
57
#else
58
#define HWY_TARGET_STR_F16C ",f16c"
59
#endif
60
61
#define HWY_TARGET_STR_SSSE3 "sse2,ssse3"
62
63
#define HWY_TARGET_STR_SSE4 \
64
HWY_TARGET_STR_SSSE3 ",sse4.1,sse4.2"
HWY_TARGET_STR_PCLMUL_AES
65
// Include previous targets, which are the half-vectors of the next target.
66
#define HWY_TARGET_STR_AVX2 \
67
HWY_TARGET_STR_SSE4 ",avx,avx2"
HWY_TARGET_STR_BMI2_FMA HWY_TARGET_STR_F16C
68
#define HWY_TARGET_STR_AVX3 \
69
HWY_TARGET_STR_AVX2 ",avx512f,avx512vl,avx512dq,avx512bw"
70
71
// Before include guard so we redefine HWY_TARGET_STR on each include,
72
// governed by the current HWY_TARGET.
73
//-----------------------------------------------------------------------------
74
// SSSE3
75
#if HWY_TARGET == HWY_SSSE3
76
77
#define HWY_NAMESPACE N_SSSE3
78
#define HWY_ALIGN alignas(16)
79
#define HWY_MAX_BYTES 16
80
#define HWY_LANES(T) (16 / sizeof(T))
81
82
#define HWY_CAP_INTEGER64 1
83
#define HWY_CAP_FLOAT16 1
84
#define HWY_CAP_FLOAT64 1
85
#define HWY_CAP_AES 0
86
#define HWY_CAP_GE256 0
87
#define HWY_CAP_GE512 0
88
89
#define HWY_TARGET_STR HWY_TARGET_STR_SSSE3
90
//-----------------------------------------------------------------------------
91
// SSE4
92
#elif HWY_TARGET == HWY_SSE4
93
94
#define HWY_NAMESPACE N_SSE4
95
#define HWY_ALIGN alignas(16)
96
#define HWY_MAX_BYTES 16
97
#define HWY_LANES(T) (16 / sizeof(T))
98
99
#define HWY_CAP_INTEGER64 1
100
#define HWY_CAP_FLOAT16 1
101
#define HWY_CAP_FLOAT64 1
102
#define HWY_CAP_GE256 0
103
#define HWY_CAP_GE512 0
104
105
#define HWY_TARGET_STR HWY_TARGET_STR_SSE4
106
107
//-----------------------------------------------------------------------------
108
// AVX2
109
#elif HWY_TARGET == HWY_AVX2
110
111
#define HWY_NAMESPACE N_AVX2
112
#define HWY_ALIGN alignas(32)
113
#define HWY_MAX_BYTES 32
114
#define HWY_LANES(T) (32 / sizeof(T))
115
116
#define HWY_CAP_INTEGER64 1
117
#define HWY_CAP_FLOAT16 1
118
#define HWY_CAP_FLOAT64 1
119
#define HWY_CAP_GE256 1
120
#define HWY_CAP_GE512 0
121
122
#define HWY_TARGET_STR HWY_TARGET_STR_AVX2
123
124
//-----------------------------------------------------------------------------
125
// AVX3[_DL]
126
#elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL
127
128
#define HWY_ALIGN alignas(64)
129
#define HWY_MAX_BYTES 64
130
#define HWY_LANES(T) (64 / sizeof(T))
131
132
#define HWY_CAP_INTEGER64 1
133
#define HWY_CAP_FLOAT16 1
134
#define HWY_CAP_FLOAT64 1
135
#define HWY_CAP_GE256 1
136
#define HWY_CAP_GE512 1
137
138
#if HWY_TARGET == HWY_AVX3
139
140
#define HWY_NAMESPACE N_AVX3
141
#define HWY_TARGET_STR HWY_TARGET_STR_AVX3
142
143
#elif HWY_TARGET == HWY_AVX3_DL
144
145
#define HWY_NAMESPACE N_AVX3_DL
146
#define HWY_TARGET_STR \
147
HWY_TARGET_STR_AVX3 \
148
",vpclmulqdq,avx512vbmi2,vaes,avxvnni,avx512bitalg,avx512vpopcntdq"
149
150
#else
151
#error "Logic error"
152
#endif
// HWY_TARGET == HWY_AVX3_DL
153
154
//-----------------------------------------------------------------------------
155
// PPC8
156
#elif HWY_TARGET == HWY_PPC8
157
158
#define HWY_ALIGN alignas(16)
159
#define HWY_MAX_BYTES 16
160
#define HWY_LANES(T) (16 / sizeof(T))
161
162
#define HWY_CAP_INTEGER64 1
163
#define HWY_CAP_FLOAT16 0
164
#define HWY_CAP_FLOAT64 1
165
#define HWY_CAP_GE256 0
166
#define HWY_CAP_GE512 0
167
168
#define HWY_NAMESPACE N_PPC8
169
170
#define HWY_TARGET_STR "altivec,vsx"
171
172
//-----------------------------------------------------------------------------
173
// NEON
174
#elif HWY_TARGET == HWY_NEON
175
176
#define HWY_ALIGN alignas(16)
177
#define HWY_MAX_BYTES 16
178
#define HWY_LANES(T) (16 / sizeof(T))
179
180
#define HWY_CAP_INTEGER64 1
181
#define HWY_CAP_FLOAT16 1
182
#define HWY_CAP_GE256 0
183
#define HWY_CAP_GE512 0
184
185
#if HWY_ARCH_ARM_A64
186
#define HWY_CAP_FLOAT64 1
187
#else
188
#define HWY_CAP_FLOAT64 0
189
#endif
190
191
#define HWY_NAMESPACE N_NEON
192
193
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
194
195
//-----------------------------------------------------------------------------
196
// SVE[2]
197
#elif HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE
198
199
#if defined(HWY_EMULATE_SVE) && !defined(__F16C__)
200
#error "Disable HWY_CAP_FLOAT16 or ensure farm_sve actually converts to f16"
201
#endif
202
203
// SVE only requires lane alignment, not natural alignment of the entire vector.
204
#define HWY_ALIGN alignas(8)
205
206
#define HWY_MAX_BYTES 256
207
208
// <= HWY_MAX_BYTES / sizeof(T): exact size. Otherwise a fraction 1/div (div =
209
// 1,2,4,8) is encoded as HWY_LANES(T) / div. This value leaves enough room for
210
// div=8 and demoting to 1/8 the lane width while still exceeding HWY_MAX_BYTES.
211
#define HWY_LANES(T) (32768 / sizeof(T))
212
213
#define HWY_CAP_INTEGER64 1
214
#define HWY_CAP_FLOAT16 1
215
#define HWY_CAP_FLOAT64 1
216
#define HWY_CAP_GE256 0
217
#define HWY_CAP_GE512 0
218
219
#if HWY_TARGET == HWY_SVE2
220
#define HWY_NAMESPACE N_SVE2
221
#else
222
#define HWY_NAMESPACE N_SVE
223
#endif
224
225
// HWY_TARGET_STR remains undefined
226
227
//-----------------------------------------------------------------------------
228
// WASM
229
#elif HWY_TARGET == HWY_WASM
230
231
#define HWY_ALIGN alignas(16)
232
#define HWY_MAX_BYTES 16
233
#define HWY_LANES(T) (16 / sizeof(T))
234
235
#define HWY_CAP_INTEGER64 0
236
#define HWY_CAP_FLOAT16 1
237
#define HWY_CAP_FLOAT64 0
238
#define HWY_CAP_GE256 0
239
#define HWY_CAP_GE512 0
240
241
#define HWY_NAMESPACE N_WASM
242
243
#define HWY_TARGET_STR "simd128"
244
245
//-----------------------------------------------------------------------------
246
// RVV
247
#elif HWY_TARGET == HWY_RVV
248
249
// RVV only requires lane alignment, not natural alignment of the entire vector,
250
// and the compiler already aligns builtin types, so nothing to do here.
251
#define HWY_ALIGN
252
253
// The spec requires VLEN <= 2^16 bits, so the limit is 2^16 bytes (LMUL=8).
254
#define HWY_MAX_BYTES 65536
255
256
// <= HWY_MAX_BYTES / sizeof(T): exact size. Otherwise a fraction 1/div (div =
257
// 1,2,4,8) is encoded as HWY_LANES(T) / div. This value leaves enough room for
258
// div=8 and demoting to 1/8 the lane width while still exceeding HWY_MAX_BYTES.
259
#define HWY_LANES(T) (8388608 / sizeof(T))
260
261
#define HWY_CAP_INTEGER64 1
262
#define HWY_CAP_FLOAT64 1
263
#define HWY_CAP_GE256 0
264
#define HWY_CAP_GE512 0
265
266
#if defined(__riscv_zfh)
267
#define HWY_CAP_FLOAT16 1
268
#else
269
#define HWY_CAP_FLOAT16 0
270
#endif
271
272
#define HWY_NAMESPACE N_RVV
273
274
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
275
// (rv64gcv is not a valid target)
276
277
//-----------------------------------------------------------------------------
278
// SCALAR
279
#elif HWY_TARGET == HWY_SCALAR
280
281
#define HWY_ALIGN
282
#define HWY_MAX_BYTES 8
283
#define HWY_LANES(T) 1
284
285
#define HWY_CAP_INTEGER64 1
286
#define HWY_CAP_FLOAT16 1
287
#define HWY_CAP_FLOAT64 1
288
#define HWY_CAP_GE256 0
289
#define HWY_CAP_GE512 0
290
291
#define HWY_NAMESPACE N_SCALAR
292
293
// HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
294
295
#else
296
#pragma message("HWY_TARGET does not match any known target"
)
297
#endif
// HWY_TARGET
298
299
// Clang <9 requires this be invoked at file scope, before any namespace.
300
#undef HWY_BEFORE_NAMESPACE
301
#if defined(HWY_TARGET_STR)
302
#define HWY_BEFORE_NAMESPACE() \
303
HWY_PUSH_ATTRIBUTES(HWY_TARGET_STR) \
304
static_assert(true, "For requiring trailing semicolon"
)
305
#else
306
// avoids compiler warning if no HWY_TARGET_STR
307
#define HWY_BEFORE_NAMESPACE() \
308
static_assert(true, "For requiring trailing semicolon"
)
309
#endif
310
311
// Clang <9 requires any namespaces be closed before this macro.
312
#undef HWY_AFTER_NAMESPACE
313
#if defined(HWY_TARGET_STR)
314
#define HWY_AFTER_NAMESPACE() \
315
HWY_POP_ATTRIBUTES \
316
static_assert(true, "For requiring trailing semicolon"
)
317
#else
318
// avoids compiler warning if no HWY_TARGET_STR
319
#define HWY_AFTER_NAMESPACE() \
320
static_assert(true, "For requiring trailing semicolon"
)
321
#endif
322
323
#undef HWY_ATTR
324
#if defined(HWY_TARGET_STR) && HWY_HAS_ATTRIBUTE(target)
325
#define HWY_ATTR __attribute__((target(HWY_TARGET_STR)))
326
#else
327
#define HWY_ATTR
328
#endif
329
330
// DEPRECATED
331
#undef HWY_GATHER_LANES
332
#define HWY_GATHER_LANES(T) HWY_LANES(T)
detect_targets.h
Generated by
1.9.1