Grok  9.5.0
targets.h
Go to the documentation of this file.
1 // Copyright 2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef HIGHWAY_HWY_TARGETS_H_
16 #define HIGHWAY_HWY_TARGETS_H_
17 
18 #include <vector>
19 
20 // For SIMD module implementations and their callers. Defines which targets to
21 // generate and call.
22 
23 #include "hwy/base.h"
24 #include "hwy/detect_targets.h"
25 
26 namespace hwy {
27 
28 // Returns (cached) bitfield of enabled targets that are supported on this CPU.
29 // Implemented in targets.cc; unconditionally compiled to support the use case
30 // of binary-only distributions. The HWY_SUPPORTED_TARGETS wrapper may allow
31 // eliding calls to this function.
32 uint32_t SupportedTargets();
33 
34 // Evaluates to a function call, or literal if there is a single target.
35 #if (HWY_TARGETS & (HWY_TARGETS - 1)) == 0
36 #define HWY_SUPPORTED_TARGETS HWY_TARGETS
37 #else
38 #define HWY_SUPPORTED_TARGETS hwy::SupportedTargets()
39 #endif
40 
41 // Disable from runtime dispatch the mask of compiled in targets. Targets that
42 // were not enabled at compile time are ignored. This function is useful to
43 // disable a target supported by the CPU that is known to have bugs or when a
44 // lower target is desired. For this reason, attempts to disable targets which
45 // are in HWY_ENABLED_BASELINE have no effect so SupportedTargets() always
46 // returns at least the baseline target.
47 void DisableTargets(uint32_t disabled_targets);
48 
49 // Set the mock mask of CPU supported targets instead of the actual CPU
50 // supported targets computed in SupportedTargets(). The return value of
51 // SupportedTargets() will still be affected by the DisableTargets() mask
52 // regardless of this mock, to prevent accidentally adding targets that are
53 // known to be buggy in the current CPU. Call with a mask of 0 to disable the
54 // mock and use the actual CPU supported targets instead.
55 void SetSupportedTargetsForTest(uint32_t targets);
56 
57 // Returns whether the SupportedTargets() function was called since the last
58 // SetSupportedTargetsForTest() call.
60 
61 // Return the list of targets in HWY_TARGETS supported by the CPU as a list of
62 // individual HWY_* target macros such as HWY_SCALAR or HWY_NEON. This list
63 // is affected by the current SetSupportedTargetsForTest() mock if any.
64 HWY_INLINE std::vector<uint32_t> SupportedAndGeneratedTargets() {
65  std::vector<uint32_t> ret;
66  for (uint32_t targets = SupportedTargets() & HWY_TARGETS; targets != 0;
67  targets = targets & (targets - 1)) {
68  uint32_t current_target = targets & ~(targets - 1);
69  ret.push_back(current_target);
70  }
71  return ret;
72 }
73 
74 static inline HWY_MAYBE_UNUSED const char* TargetName(uint32_t target) {
75  switch (target) {
76 #if HWY_ARCH_X86
77  case HWY_SSSE3:
78  return "SSSE3";
79  case HWY_SSE4:
80  return "SSE4";
81  case HWY_AVX2:
82  return "AVX2";
83  case HWY_AVX3:
84  return "AVX3";
85  case HWY_AVX3_DL:
86  return "AVX3_DL";
87 #endif
88 
89 #if HWY_ARCH_ARM
90  case HWY_SVE2:
91  return "SVE2";
92  case HWY_SVE:
93  return "SVE";
94  case HWY_NEON:
95  return "Neon";
96 #endif
97 
98 #if HWY_ARCH_PPC
99  case HWY_PPC8:
100  return "Power8";
101 #endif
102 
103 #if HWY_ARCH_WASM
104  case HWY_WASM:
105  return "Wasm";
106 #endif
107 
108 #if HWY_ARCH_RVV
109  case HWY_RVV:
110  return "RVV";
111 #endif
112 
113  case HWY_SCALAR:
114  return "Scalar";
115 
116  default:
117  return "Unknown"; // must satisfy gtest IsValidParamName()
118  }
119 }
120 
121 // The maximum number of dynamic targets on any architecture is defined by
122 // HWY_MAX_DYNAMIC_TARGETS and depends on the arch.
123 
124 // For the ChosenTarget mask and index we use a different bit arrangement than
125 // in the HWY_TARGETS mask. Only the targets involved in the current
126 // architecture are used in this mask, and therefore only the least significant
127 // (HWY_MAX_DYNAMIC_TARGETS + 2) bits of the uint32_t mask are used. The least
128 // significant bit is set when the mask is not initialized, the next
129 // HWY_MAX_DYNAMIC_TARGETS more significant bits are a range of bits from the
130 // HWY_TARGETS or SupportedTargets() mask for the given architecture shifted to
131 // that position and the next more significant bit is used for the scalar
132 // target. Because of this we need to define equivalent values for HWY_TARGETS
133 // in this representation.
134 // This mask representation allows to use ctz() on this mask and obtain a small
135 // number that's used as an index of the table for dynamic dispatch. In this
136 // way the first entry is used when the mask is uninitialized, the following
137 // HWY_MAX_DYNAMIC_TARGETS are for dynamic dispatch and the last one is for
138 // scalar.
139 
140 // The HWY_SCALAR bit in the ChosenTarget mask format.
141 #define HWY_CHOSEN_TARGET_MASK_SCALAR (1u << (HWY_MAX_DYNAMIC_TARGETS + 1))
142 
143 // Converts from a HWY_TARGETS mask to a ChosenTarget mask format for the
144 // current architecture.
145 #define HWY_CHOSEN_TARGET_SHIFT(X) \
146  ((((X) >> (HWY_HIGHEST_TARGET_BIT + 1 - HWY_MAX_DYNAMIC_TARGETS)) & \
147  ((1u << HWY_MAX_DYNAMIC_TARGETS) - 1)) \
148  << 1)
149 
150 // The HWY_TARGETS mask in the ChosenTarget mask format.
151 #define HWY_CHOSEN_TARGET_MASK_TARGETS \
152  (HWY_CHOSEN_TARGET_SHIFT(HWY_TARGETS) | HWY_CHOSEN_TARGET_MASK_SCALAR | 1u)
153 
154 #if HWY_ARCH_X86
155 // Maximum number of dynamic targets, changing this value is an ABI incompatible
156 // change
157 #define HWY_MAX_DYNAMIC_TARGETS 10
158 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_X86
159 // These must match the order in which the HWY_TARGETS are defined
160 // starting by the least significant (HWY_HIGHEST_TARGET_BIT + 1 -
161 // HWY_MAX_DYNAMIC_TARGETS) bit. This list must contain exactly
162 // HWY_MAX_DYNAMIC_TARGETS elements and does not include SCALAR. The first entry
163 // corresponds to the best target. Don't include a "," at the end of the list.
164 #define HWY_CHOOSE_TARGET_LIST(func_name) \
165  nullptr, /* reserved */ \
166  nullptr, /* reserved */ \
167  HWY_CHOOSE_AVX3_DL(func_name), /* AVX3_DL */ \
168  HWY_CHOOSE_AVX3(func_name), /* AVX3 */ \
169  HWY_CHOOSE_AVX2(func_name), /* AVX2 */ \
170  nullptr, /* AVX */ \
171  HWY_CHOOSE_SSE4(func_name), /* SSE4 */ \
172  HWY_CHOOSE_SSSE3(func_name), /* SSSE3 */ \
173  nullptr, /* SSE3 */ \
174  nullptr /* SSE2 */
175 
176 #elif HWY_ARCH_ARM
177 // See HWY_ARCH_X86 above for details.
178 #define HWY_MAX_DYNAMIC_TARGETS 4
179 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_ARM
180 #define HWY_CHOOSE_TARGET_LIST(func_name) \
181  HWY_CHOOSE_SVE2(func_name), /* SVE2 */ \
182  HWY_CHOOSE_SVE(func_name), /* SVE */ \
183  nullptr, /* reserved */ \
184  HWY_CHOOSE_NEON(func_name) /* NEON */
185 
186 #elif HWY_ARCH_PPC
187 // See HWY_ARCH_X86 above for details.
188 #define HWY_MAX_DYNAMIC_TARGETS 5
189 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_PPC
190 #define HWY_CHOOSE_TARGET_LIST(func_name) \
191  nullptr, /* reserved */ \
192  nullptr, /* reserved */ \
193  HWY_CHOOSE_PPC8(func_name), /* PPC8 */ \
194  nullptr, /* VSX */ \
195  nullptr /* AltiVec */
196 
197 #elif HWY_ARCH_WASM
198 // See HWY_ARCH_X86 above for details.
199 #define HWY_MAX_DYNAMIC_TARGETS 4
200 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_WASM
201 #define HWY_CHOOSE_TARGET_LIST(func_name) \
202  nullptr, /* reserved */ \
203  nullptr, /* reserved */ \
204  nullptr, /* reserved */ \
205  HWY_CHOOSE_WASM(func_name) /* WASM */
206 
207 #elif HWY_ARCH_RVV
208 // See HWY_ARCH_X86 above for details.
209 #define HWY_MAX_DYNAMIC_TARGETS 4
210 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV
211 #define HWY_CHOOSE_TARGET_LIST(func_name) \
212  nullptr, /* reserved */ \
213  nullptr, /* reserved */ \
214  nullptr, /* reserved */ \
215  HWY_CHOOSE_RVV(func_name) /* RVV */
216 
217 #else
218 // Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though
219 // still creating single-entry tables in HWY_EXPORT to ensure portability.
220 #define HWY_MAX_DYNAMIC_TARGETS 1
221 #define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_SCALAR
222 #endif
223 
224 struct ChosenTarget {
225  public:
226  // Update the ChosenTarget mask based on the current CPU supported
227  // targets.
228  void Update();
229 
230  // Reset the ChosenTarget to the uninitialized state.
231  void DeInit() { mask_.store(1); }
232 
233  // Whether the ChosenTarget was initialized. This is useful to know whether
234  // any HWY_DYNAMIC_DISPATCH function was called.
235  bool IsInitialized() const { return mask_.load() != 1; }
236 
237  // Return the index in the dynamic dispatch table to be used by the current
238  // CPU. Note that this method must be in the header file so it uses the value
239  // of HWY_CHOSEN_TARGET_MASK_TARGETS defined in the translation unit that
240  // calls it, which may be different from others. This allows to only consider
241  // those targets that were actually compiled in this module.
242  size_t HWY_INLINE GetIndex() const {
245  }
246 
247  private:
248  // Initialized to 1 so GetChosenTargetIndex() returns 0.
249  std::atomic<uint32_t> mask_{1};
250 };
251 
252 extern ChosenTarget chosen_target;
253 
254 } // namespace hwy
255 
256 #endif // HIGHWAY_HWY_TARGETS_H_
#define HWY_INLINE
Definition: base.h:59
#define HWY_MAYBE_UNUSED
Definition: base.h:70
#define HWY_AVX3_DL
Definition: detect_targets.h:57
#define HWY_NEON
Definition: detect_targets.h:74
#define HWY_PPC8
Definition: detect_targets.h:79
#define HWY_SVE2
Definition: detect_targets.h:71
#define HWY_AVX3
Definition: detect_targets.h:58
#define HWY_AVX2
Definition: detect_targets.h:59
#define HWY_SCALAR
Definition: detect_targets.h:97
#define HWY_WASM
Definition: detect_targets.h:85
#define HWY_SVE
Definition: detect_targets.h:72
#define HWY_RVV
Definition: detect_targets.h:91
#define HWY_TARGETS
Definition: detect_targets.h:377
#define HWY_SSE4
Definition: detect_targets.h:61
#define HWY_SSSE3
Definition: detect_targets.h:62
Definition: aligned_allocator.h:23
uint32_t SupportedTargets()
void SetSupportedTargetsForTest(uint32_t targets)
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x)
Definition: base.h:555
static HWY_MAYBE_UNUSED const char * TargetName(uint32_t target)
Definition: targets.h:74
HWY_INLINE std::vector< uint32_t > SupportedAndGeneratedTargets()
Definition: targets.h:64
bool SupportedTargetsCalledForTest()
ChosenTarget chosen_target
void DisableTargets(uint32_t disabled_targets)
Definition: targets.h:224
bool IsInitialized() const
Definition: targets.h:235
size_t HWY_INLINE GetIndex() const
Definition: targets.h:242
void DeInit()
Definition: targets.h:231
std::atomic< uint32_t > mask_
Definition: targets.h:249
#define HWY_CHOSEN_TARGET_MASK_TARGETS
Definition: targets.h:151