Grok 10.0.1
targets.h
Go to the documentation of this file.
1// Copyright 2020 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#ifndef HIGHWAY_HWY_TARGETS_H_
17#define HIGHWAY_HWY_TARGETS_H_
18
19#include <vector>
20
21// For SIMD module implementations and their callers. Defines which targets to
22// generate and call.
23
24#include "hwy/base.h"
25#include "hwy/detect_targets.h"
26#include "hwy/highway_export.h"
27
28#if !HWY_ARCH_RVV
29#include <atomic>
30#endif
31
32namespace hwy {
33
34// Returns bitfield of enabled targets that are supported on this CPU; there is
35// always at least one such target, hence the return value is never 0. The
36// targets returned may change after calling DisableTargets. This function is
37// always defined, but the HWY_SUPPORTED_TARGETS wrapper may allow eliding
38// calls to it if there is only a single target enabled.
40
41// Evaluates to a function call, or literal if there is a single target.
42#if (HWY_TARGETS & (HWY_TARGETS - 1)) == 0
43#define HWY_SUPPORTED_TARGETS HWY_TARGETS
44#else
45#define HWY_SUPPORTED_TARGETS hwy::SupportedTargets()
46#endif
47
48// Subsequent SupportedTargets will not return targets whose bit(s) are set in
49// `disabled_targets`. Exception: if SupportedTargets would return 0, it will
50// instead return HWY_STATIC_TARGET (there must always be one target to call).
51//
52// This function is useful for disabling targets known to be buggy, or if the
53// best available target is undesirable (perhaps due to throttling or memory
54// bandwidth limitations). Use SetSupportedTargetsForTest instead of this
55// function for iteratively enabling specific targets for testing.
56HWY_DLLEXPORT void DisableTargets(uint32_t disabled_targets);
57
58// Subsequent SupportedTargets will return the given set of targets, except
59// those disabled via DisableTargets. Call with a mask of 0 to disable the mock
60// and return to the normal SupportedTargets behavior. Used to run tests for
61// all targets.
63
64// Return the list of targets in HWY_TARGETS supported by the CPU as a list of
65// individual HWY_* target macros such as HWY_SCALAR or HWY_NEON. This list
66// is affected by the current SetSupportedTargetsForTest() mock if any.
68 std::vector<uint32_t> ret;
69 for (uint32_t targets = SupportedTargets() & HWY_TARGETS; targets != 0;
70 targets = targets & (targets - 1)) {
71 uint32_t current_target = targets & ~(targets - 1);
72 ret.push_back(current_target);
73 }
74 return ret;
75}
76
77static inline HWY_MAYBE_UNUSED const char* TargetName(uint32_t target) {
78 switch (target) {
79#if HWY_ARCH_X86
80 case HWY_SSSE3:
81 return "SSSE3";
82 case HWY_SSE4:
83 return "SSE4";
84 case HWY_AVX2:
85 return "AVX2";
86 case HWY_AVX3:
87 return "AVX3";
88 case HWY_AVX3_DL:
89 return "AVX3_DL";
90#endif
91
92#if HWY_ARCH_ARM
93 case HWY_SVE2_128:
94 return "SVE2_128";
95 case HWY_SVE_256:
96 return "SVE_256";
97 case HWY_SVE2:
98 return "SVE2";
99 case HWY_SVE:
100 return "SVE";
101 case HWY_NEON:
102 return "Neon";
103#endif
104
105#if HWY_ARCH_PPC
106 case HWY_PPC8:
107 return "Power8";
108#endif
109
110#if HWY_ARCH_WASM
111 case HWY_WASM:
112 return "Wasm";
113 case HWY_WASM_EMU256:
114 return "Wasm2";
115#endif
116
117#if HWY_ARCH_RVV
118 case HWY_RVV:
119 return "RVV";
120#endif
121
122 case HWY_EMU128:
123 return "Emu128";
124 case HWY_SCALAR:
125 return "Scalar";
126
127 default:
128 return "Unknown"; // must satisfy gtest IsValidParamName()
129 }
130}
131
132// The maximum number of dynamic targets on any architecture is defined by
133// HWY_MAX_DYNAMIC_TARGETS and depends on the arch.
134
135// For the ChosenTarget mask and index we use a different bit arrangement than
136// in the HWY_TARGETS mask. Only the targets involved in the current
137// architecture are used in this mask, and therefore only the least significant
138// (HWY_MAX_DYNAMIC_TARGETS + 2) bits of the uint32_t mask are used. The least
139// significant bit is set when the mask is not initialized, the next
140// HWY_MAX_DYNAMIC_TARGETS more significant bits are a range of bits from the
141// HWY_TARGETS or SupportedTargets() mask for the given architecture shifted to
142// that position and the next more significant bit is used for HWY_SCALAR (if
143// HWY_COMPILE_ONLY_SCALAR is defined) or HWY_EMU128. Because of this we need to
144// define equivalent values for HWY_TARGETS in this representation.
145// This mask representation allows to use ctz() on this mask and obtain a small
146// number that's used as an index of the table for dynamic dispatch. In this
147// way the first entry is used when the mask is uninitialized, the following
148// HWY_MAX_DYNAMIC_TARGETS are for dynamic dispatch and the last one is for
149// scalar.
150
151// The HWY_SCALAR/HWY_EMU128 bit in the ChosenTarget mask format.
152#define HWY_CHOSEN_TARGET_MASK_SCALAR (1u << (HWY_MAX_DYNAMIC_TARGETS + 1))
153
154// Converts from a HWY_TARGETS mask to a ChosenTarget mask format for the
155// current architecture.
156#define HWY_CHOSEN_TARGET_SHIFT(X) \
157 ((((X) >> (HWY_HIGHEST_TARGET_BIT + 1 - HWY_MAX_DYNAMIC_TARGETS)) & \
158 ((1u << HWY_MAX_DYNAMIC_TARGETS) - 1)) \
159 << 1)
160
161// The HWY_TARGETS mask in the ChosenTarget mask format.
162#define HWY_CHOSEN_TARGET_MASK_TARGETS \
163 (HWY_CHOSEN_TARGET_SHIFT(HWY_TARGETS) | HWY_CHOSEN_TARGET_MASK_SCALAR | 1u)
164
165#if HWY_ARCH_X86
166// Maximum number of dynamic targets, changing this value is an ABI incompatible
167// change
168#define HWY_MAX_DYNAMIC_TARGETS 10
169#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_X86
170// These must match the order in which the HWY_TARGETS are defined
171// starting by the least significant (HWY_HIGHEST_TARGET_BIT + 1 -
172// HWY_MAX_DYNAMIC_TARGETS) bit. This list must contain exactly
173// HWY_MAX_DYNAMIC_TARGETS elements and does not include SCALAR. The first entry
174// corresponds to the best target. Don't include a "," at the end of the list.
175#define HWY_CHOOSE_TARGET_LIST(func_name) \
176 nullptr, /* reserved */ \
177 nullptr, /* reserved */ \
178 nullptr, /* reserved */ \
179 HWY_CHOOSE_AVX3_DL(func_name), /* AVX3_DL */ \
180 HWY_CHOOSE_AVX3(func_name), /* AVX3 */ \
181 HWY_CHOOSE_AVX2(func_name), /* AVX2 */ \
182 nullptr, /* AVX */ \
183 HWY_CHOOSE_SSE4(func_name), /* SSE4 */ \
184 HWY_CHOOSE_SSSE3(func_name), /* SSSE3 */ \
185 nullptr /* SSE3 or SSE2 */
186
187#elif HWY_ARCH_ARM
188// See HWY_ARCH_X86 above for details.
189#define HWY_MAX_DYNAMIC_TARGETS 8
190#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_ARM
191#define HWY_CHOOSE_TARGET_LIST(func_name) \
192 nullptr, /* reserved */ \
193 nullptr, /* reserved */ \
194 HWY_CHOOSE_SVE2_128(func_name), /* SVE2 128-bit */ \
195 HWY_CHOOSE_SVE_256(func_name), /* SVE 256-bit */ \
196 HWY_CHOOSE_SVE2(func_name), /* SVE2 */ \
197 HWY_CHOOSE_SVE(func_name), /* SVE */ \
198 nullptr, /* reserved */ \
199 HWY_CHOOSE_NEON(func_name) /* NEON */
200
201#elif HWY_ARCH_PPC
202// See HWY_ARCH_X86 above for details.
203#define HWY_MAX_DYNAMIC_TARGETS 3
204#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_PPC
205#define HWY_CHOOSE_TARGET_LIST(func_name) \
206 nullptr, /* reserved */ \
207 HWY_CHOOSE_PPC8(func_name), /* PPC8 */ \
208 nullptr /* VSX or AltiVec */
209
210#elif HWY_ARCH_WASM
211// See HWY_ARCH_X86 above for details.
212#define HWY_MAX_DYNAMIC_TARGETS 4
213#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_WASM
214#define HWY_CHOOSE_TARGET_LIST(func_name) \
215 nullptr, /* reserved */ \
216 nullptr, /* reserved */ \
217 HWY_CHOOSE_WASM2(func_name), /* WASM2 */ \
218 HWY_CHOOSE_WASM(func_name) /* WASM */
219
220#elif HWY_ARCH_RVV
221// See HWY_ARCH_X86 above for details.
222#define HWY_MAX_DYNAMIC_TARGETS 4
223#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV
224#define HWY_CHOOSE_TARGET_LIST(func_name) \
225 nullptr, /* reserved */ \
226 nullptr, /* reserved */ \
227 nullptr, /* reserved */ \
228 HWY_CHOOSE_RVV(func_name) /* RVV */
229
230#else
231// Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though
232// still creating single-entry tables in HWY_EXPORT to ensure portability.
233#define HWY_MAX_DYNAMIC_TARGETS 1
234#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_SCALAR
235#endif
236
237// Bitfield of supported and enabled targets. The format differs from that of
238// HWY_TARGETS; the lowest bit governs the first function pointer (which is
239// special in that it calls FunctionCache, then Update, then dispatches to the
240// actual implementation) in the tables created by HWY_EXPORT. Monostate (see
241// GetChosenTarget), thread-safe except on RVV.
243 public:
244 // Reset bits according to `targets` (typically the return value of
245 // SupportedTargets()). Postcondition: IsInitialized() == true.
246 void Update(uint32_t targets) {
247 // These are `targets` shifted downwards, see above. Also include SCALAR
248 // (corresponds to the last entry in the function table) as fallback.
250 }
251
252 // Reset to the uninitialized state, so that FunctionCache will call Update
253 // during the next HWY_DYNAMIC_DISPATCH, and IsInitialized returns false.
254 void DeInit() { StoreMask(1); }
255
256 // Whether Update was called. This indicates whether any HWY_DYNAMIC_DISPATCH
257 // function was called, which we check in tests.
258 bool IsInitialized() const { return LoadMask() != 1; }
259
260 // Return the index in the dynamic dispatch table to be used by the current
261 // CPU. Note that this method must be in the header file so it uses the value
262 // of HWY_CHOSEN_TARGET_MASK_TARGETS defined in the translation unit that
263 // calls it, which may be different from others. This means we only enable
264 // those targets that were actually compiled in this module.
265 size_t HWY_INLINE GetIndex() const {
268 }
269
270 private:
271 // TODO(janwas): remove #if once <atomic> is available
272#if HWY_ARCH_RVV
273 uint32_t LoadMask() const { return mask_; }
274 void StoreMask(uint32_t mask) { mask_ = mask; }
275
276 uint32_t mask_{1}; // Initialized to 1 so GetIndex() returns 0.
277#else
278 uint32_t LoadMask() const { return mask_.load(); }
279 void StoreMask(uint32_t mask) { mask_.store(mask); }
280
281 std::atomic<uint32_t> mask_{1}; // Initialized to 1 so GetIndex() returns 0.
282#endif // HWY_ARCH_RVV
283};
284
285// For internal use (e.g. by FunctionCache and DisableTargets).
287
288} // namespace hwy
289
290#endif // HIGHWAY_HWY_TARGETS_H_
#define HWY_INLINE
Definition: base.h:62
#define HWY_MAYBE_UNUSED
Definition: base.h:73
#define HWY_WASM_EMU256
Definition: detect_targets.h:93
#define HWY_AVX3_DL
Definition: detect_targets.h:62
#define HWY_NEON
Definition: detect_targets.h:82
#define HWY_EMU128
Definition: detect_targets.h:103
#define HWY_PPC8
Definition: detect_targets.h:87
#define HWY_SVE2
Definition: detect_targets.h:79
#define HWY_AVX3
Definition: detect_targets.h:63
#define HWY_AVX2
Definition: detect_targets.h:64
#define HWY_SCALAR
Definition: detect_targets.h:104
#define HWY_SVE_256
Definition: detect_targets.h:78
#define HWY_SVE2_128
Definition: detect_targets.h:77
#define HWY_WASM
Definition: detect_targets.h:94
#define HWY_SVE
Definition: detect_targets.h:80
#define HWY_RVV
Definition: detect_targets.h:99
#define HWY_TARGETS
Definition: detect_targets.h:401
#define HWY_SSE4
Definition: detect_targets.h:66
#define HWY_SSSE3
Definition: detect_targets.h:67
#define HWY_DLLEXPORT
Definition: highway_export.h:13
Definition: aligned_allocator.h:27
static HWY_MAYBE_UNUSED const char * TargetName(uint32_t target)
Definition: targets.h:77
HWY_DLLEXPORT void SetSupportedTargetsForTest(uint32_t targets)
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x)
Definition: base.h:674
HWY_DLLEXPORT ChosenTarget & GetChosenTarget()
HWY_INLINE std::vector< uint32_t > SupportedAndGeneratedTargets()
Definition: targets.h:67
HWY_DLLEXPORT uint32_t SupportedTargets()
HWY_DLLEXPORT void DisableTargets(uint32_t disabled_targets)
Definition: targets.h:242
uint32_t LoadMask() const
Definition: targets.h:278
void StoreMask(uint32_t mask)
Definition: targets.h:279
bool IsInitialized() const
Definition: targets.h:258
size_t HWY_INLINE GetIndex() const
Definition: targets.h:265
void DeInit()
Definition: targets.h:254
void Update(uint32_t targets)
Definition: targets.h:246
std::atomic< uint32_t > mask_
Definition: targets.h:281
#define HWY_CHOSEN_TARGET_MASK_TARGETS
Definition: targets.h:162
#define HWY_CHOSEN_TARGET_SHIFT(X)
Definition: targets.h:156
#define HWY_CHOSEN_TARGET_MASK_SCALAR
Definition: targets.h:152