Grok  9.5.0
highway.h
Go to the documentation of this file.
1 // Copyright 2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // This include guard is checked by foreach_target, so avoid the usual _H_
16 // suffix to prevent copybara from renaming it. NOTE: ops/*-inl.h are included
17 // after/outside this include guard.
18 #ifndef HWY_HIGHWAY_INCLUDED
19 #define HWY_HIGHWAY_INCLUDED
20 
21 // Main header required before using vector types.
22 
23 #include "hwy/base.h"
24 #include "hwy/targets.h"
25 
26 namespace hwy {
27 
28 // API version (https://semver.org/); keep in sync with CMakeLists.txt.
29 #define HWY_MAJOR 0
30 #define HWY_MINOR 14
31 #define HWY_PATCH 2
32 
33 //------------------------------------------------------------------------------
34 // Shorthand for descriptors (defined in shared-inl.h) used to select overloads.
35 
36 // HWY_FULL(T[,LMUL=1]) is a native vector/group. LMUL is the number of
37 // registers in the group, and is ignored on targets that do not support groups.
38 #define HWY_FULL1(T) hwy::HWY_NAMESPACE::Simd<T, HWY_LANES(T)>
39 #define HWY_3TH_ARG(arg1, arg2, arg3, ...) arg3
40 // Workaround for MSVC grouping __VA_ARGS__ into a single argument
41 #define HWY_FULL_RECOMPOSER(args_with_paren) HWY_3TH_ARG args_with_paren
42 // Trailing comma avoids -pedantic false alarm
43 #define HWY_CHOOSE_FULL(...) \
44  HWY_FULL_RECOMPOSER((__VA_ARGS__, HWY_FULL2, HWY_FULL1, ))
45 #define HWY_FULL(...) HWY_CHOOSE_FULL(__VA_ARGS__())(__VA_ARGS__)
46 
47 // Vector of up to MAX_N lanes. Discouraged, when possible, use Half<> instead.
48 #define HWY_CAPPED(T, MAX_N) \
49  hwy::HWY_NAMESPACE::Simd<T, HWY_MIN(MAX_N, HWY_LANES(T))>
50 
51 //------------------------------------------------------------------------------
52 // Export user functions for static/dynamic dispatch
53 
54 // Evaluates to 0 inside a translation unit if it is generating anything but the
55 // static target (the last one if multiple targets are enabled). Used to prevent
56 // redefinitions of HWY_EXPORT. Unless foreach_target.h is included, we only
57 // compile once anyway, so this is 1 unless it is or has been included.
58 #ifndef HWY_ONCE
59 #define HWY_ONCE 1
60 #endif
61 
62 // HWY_STATIC_DISPATCH(FUNC_NAME) is the namespace-qualified FUNC_NAME for
63 // HWY_STATIC_TARGET (the only defined namespace unless HWY_TARGET_INCLUDE is
64 // defined), and can be used to deduce the return type of Choose*.
65 #if HWY_STATIC_TARGET == HWY_SCALAR
66 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SCALAR::FUNC_NAME
67 #elif HWY_STATIC_TARGET == HWY_RVV
68 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_RVV::FUNC_NAME
69 #elif HWY_STATIC_TARGET == HWY_WASM
70 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM::FUNC_NAME
71 #elif HWY_STATIC_TARGET == HWY_NEON
72 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON::FUNC_NAME
73 #elif HWY_STATIC_TARGET == HWY_SVE
74 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE::FUNC_NAME
75 #elif HWY_STATIC_TARGET == HWY_SVE2
76 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2::FUNC_NAME
77 #elif HWY_STATIC_TARGET == HWY_PPC8
78 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC8::FUNC_NAME
79 #elif HWY_STATIC_TARGET == HWY_SSSE3
80 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSSE3::FUNC_NAME
81 #elif HWY_STATIC_TARGET == HWY_SSE4
82 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE4::FUNC_NAME
83 #elif HWY_STATIC_TARGET == HWY_AVX2
84 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX2::FUNC_NAME
85 #elif HWY_STATIC_TARGET == HWY_AVX3
86 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3::FUNC_NAME
87 #elif HWY_STATIC_TARGET == HWY_AVX3_DL
88 #define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_DL::FUNC_NAME
89 #endif
90 
91 // Dynamic dispatch declarations.
92 
93 template <typename RetType, typename... Args>
94 struct FunctionCache {
95  public:
96  typedef RetType(FunctionType)(Args...);
97 
98  // A template function that when instantiated has the same signature as the
99  // function being called. This function initializes the global cache of the
100  // current supported targets mask used for dynamic dispatch and calls the
101  // appropriate function. Since this mask used for dynamic dispatch is a
102  // global cache, all the highway exported functions, even those exposed by
103  // different modules, will be initialized after this function runs for any one
104  // of those exported functions.
105  template <FunctionType* const table[]>
106  static RetType ChooseAndCall(Args... args) {
107  // If we are running here it means we need to update the chosen target.
109  return (table[chosen_target.GetIndex()])(args...);
110  }
111 };
112 
113 // Factory function only used to infer the template parameters RetType and Args
114 // from a function passed to the factory.
115 template <typename RetType, typename... Args>
116 FunctionCache<RetType, Args...> FunctionCacheFactory(RetType (*)(Args...)) {
117  return FunctionCache<RetType, Args...>();
118 }
119 
120 // HWY_CHOOSE_*(FUNC_NAME) expands to the function pointer for that target or
121 // nullptr is that target was not compiled.
122 #if HWY_TARGETS & HWY_SCALAR
123 #define HWY_CHOOSE_SCALAR(FUNC_NAME) &N_SCALAR::FUNC_NAME
124 #else
125 // When scalar is not present and we try to use scalar because other targets
126 // were disabled at runtime we fall back to the baseline with
127 // HWY_STATIC_DISPATCH()
128 #define HWY_CHOOSE_SCALAR(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
129 #endif
130 
131 #if HWY_TARGETS & HWY_WASM
132 #define HWY_CHOOSE_WASM(FUNC_NAME) &N_WASM::FUNC_NAME
133 #else
134 #define HWY_CHOOSE_WASM(FUNC_NAME) nullptr
135 #endif
136 
137 #if HWY_TARGETS & HWY_RVV
138 #define HWY_CHOOSE_RVV(FUNC_NAME) &N_RVV::FUNC_NAME
139 #else
140 #define HWY_CHOOSE_RVV(FUNC_NAME) nullptr
141 #endif
142 
143 #if HWY_TARGETS & HWY_NEON
144 #define HWY_CHOOSE_NEON(FUNC_NAME) &N_NEON::FUNC_NAME
145 #else
146 #define HWY_CHOOSE_NEON(FUNC_NAME) nullptr
147 #endif
148 
149 #if HWY_TARGETS & HWY_SVE
150 #define HWY_CHOOSE_SVE(FUNC_NAME) &N_SVE::FUNC_NAME
151 #else
152 #define HWY_CHOOSE_SVE(FUNC_NAME) nullptr
153 #endif
154 
155 #if HWY_TARGETS & HWY_SVE2
156 #define HWY_CHOOSE_SVE2(FUNC_NAME) &N_SVE2::FUNC_NAME
157 #else
158 #define HWY_CHOOSE_SVE2(FUNC_NAME) nullptr
159 #endif
160 
161 #if HWY_TARGETS & HWY_PPC8
162 #define HWY_CHOOSE_PCC8(FUNC_NAME) &N_PPC8::FUNC_NAME
163 #else
164 #define HWY_CHOOSE_PPC8(FUNC_NAME) nullptr
165 #endif
166 
167 #if HWY_TARGETS & HWY_SSSE3
168 #define HWY_CHOOSE_SSSE3(FUNC_NAME) &N_SSSE3::FUNC_NAME
169 #else
170 #define HWY_CHOOSE_SSSE3(FUNC_NAME) nullptr
171 #endif
172 
173 #if HWY_TARGETS & HWY_SSE4
174 #define HWY_CHOOSE_SSE4(FUNC_NAME) &N_SSE4::FUNC_NAME
175 #else
176 #define HWY_CHOOSE_SSE4(FUNC_NAME) nullptr
177 #endif
178 
179 #if HWY_TARGETS & HWY_AVX2
180 #define HWY_CHOOSE_AVX2(FUNC_NAME) &N_AVX2::FUNC_NAME
181 #else
182 #define HWY_CHOOSE_AVX2(FUNC_NAME) nullptr
183 #endif
184 
185 #if HWY_TARGETS & HWY_AVX3
186 #define HWY_CHOOSE_AVX3(FUNC_NAME) &N_AVX3::FUNC_NAME
187 #else
188 #define HWY_CHOOSE_AVX3(FUNC_NAME) nullptr
189 #endif
190 
191 #if HWY_TARGETS & HWY_AVX3_DL
192 #define HWY_CHOOSE_AVX3_DL(FUNC_NAME) &N_AVX3_DL::FUNC_NAME
193 #else
194 #define HWY_CHOOSE_AVX3_DL(FUNC_NAME) nullptr
195 #endif
196 
197 #define HWY_DISPATCH_TABLE(FUNC_NAME) \
198  HWY_CONCAT(FUNC_NAME, HighwayDispatchTable)
199 
200 // HWY_EXPORT(FUNC_NAME); expands to a static array that is used by
201 // HWY_DYNAMIC_DISPATCH() to call the appropriate function at runtime. This
202 // static array must be defined at the same namespace level as the function
203 // it is exporting.
204 // After being exported, it can be called from other parts of the same source
205 // file using HWY_DYNAMIC_DISTPATCH(), in particular from a function wrapper
206 // like in the following example:
207 //
208 // #include "hwy/highway.h"
209 // HWY_BEFORE_NAMESPACE();
210 // namespace skeleton {
211 // namespace HWY_NAMESPACE {
212 //
213 // void MyFunction(int a, char b, const char* c) { ... }
214 //
215 // // NOLINTNEXTLINE(google-readability-namespace-comments)
216 // } // namespace HWY_NAMESPACE
217 // } // namespace skeleton
218 // HWY_AFTER_NAMESPACE();
219 //
220 // namespace skeleton {
221 // HWY_EXPORT(MyFunction); // Defines the dispatch table in this scope.
222 //
223 // void MyFunction(int a, char b, const char* c) {
224 // return HWY_DYNAMIC_DISPATCH(MyFunction)(a, b, c);
225 // }
226 // } // namespace skeleton
227 //
228 
229 #if HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
230 
231 // Simplified version for IDE or the dynamic dispatch case with only one target.
232 // This case still uses a table, although of a single element, to provide the
233 // same compile error conditions as with the dynamic dispatch case when multiple
234 // targets are being compiled.
235 #define HWY_EXPORT(FUNC_NAME) \
236  HWY_MAYBE_UNUSED static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) \
237  const HWY_DISPATCH_TABLE(FUNC_NAME)[1] = { \
238  &HWY_STATIC_DISPATCH(FUNC_NAME)}
239 #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) HWY_STATIC_DISPATCH(FUNC_NAME)
240 
241 #else
242 
243 // Dynamic dispatch case with one entry per dynamic target plus the scalar
244 // mode and the initialization wrapper.
245 #define HWY_EXPORT(FUNC_NAME) \
246  static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) \
247  const HWY_DISPATCH_TABLE(FUNC_NAME)[HWY_MAX_DYNAMIC_TARGETS + 2] = { \
248  /* The first entry in the table initializes the global cache and \
249  * calls the appropriate function. */ \
250  &decltype(hwy::FunctionCacheFactory(&HWY_STATIC_DISPATCH( \
251  FUNC_NAME)))::ChooseAndCall<HWY_DISPATCH_TABLE(FUNC_NAME)>, \
252  HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \
253  HWY_CHOOSE_SCALAR(FUNC_NAME), \
254  }
255 #define HWY_DYNAMIC_DISPATCH(FUNC_NAME) \
256  (*(HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::chosen_target.GetIndex()]))
257 
258 #endif // HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
259 
260 } // namespace hwy
261 
262 #endif // HWY_HIGHWAY_INCLUDED
263 
264 //------------------------------------------------------------------------------
265 
266 // NOTE: the following definitions and ops/*.h depend on HWY_TARGET, so we want
267 // to include them once per target, which is ensured by the toggle check.
268 // Because ops/*.h are included under it, they do not need their own guard.
269 #if defined(HWY_HIGHWAY_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
270 #ifdef HWY_HIGHWAY_PER_TARGET
271 #undef HWY_HIGHWAY_PER_TARGET
272 #else
273 #define HWY_HIGHWAY_PER_TARGET
274 #endif
275 
276 #undef HWY_FULL2
277 #if HWY_TARGET == HWY_RVV
278 #define HWY_FULL2(T, LMUL) hwy::HWY_NAMESPACE::Simd<T, HWY_LANES(T) * (LMUL)>
279 #else
280 #define HWY_FULL2(T, LMUL) hwy::HWY_NAMESPACE::Simd<T, HWY_LANES(T)>
281 #endif
282 
283 // These define ops inside namespace hwy::HWY_NAMESPACE.
284 #if HWY_TARGET == HWY_SSSE3 || HWY_TARGET == HWY_SSE4
285 #include "hwy/ops/x86_128-inl.h"
286 #elif HWY_TARGET == HWY_AVX2
287 #include "hwy/ops/x86_256-inl.h"
288 #elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL
289 #include "hwy/ops/x86_512-inl.h"
290 #elif HWY_TARGET == HWY_PPC8
291 #error "PPC is not yet supported"
292 #elif HWY_TARGET == HWY_NEON
293 #include "hwy/ops/arm_neon-inl.h"
294 #elif HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2
295 #include "hwy/ops/arm_sve-inl.h"
296 #elif HWY_TARGET == HWY_WASM
297 #include "hwy/ops/wasm_128-inl.h"
298 #elif HWY_TARGET == HWY_RVV
299 #include "hwy/ops/rvv-inl.h"
300 #elif HWY_TARGET == HWY_SCALAR
301 #include "hwy/ops/scalar-inl.h"
302 #else
303 #pragma message("HWY_TARGET does not match any known target")
304 #endif // HWY_TARGET
305 
306 #include "hwy/ops/generic_ops-inl.h"
307 
308 #endif // HWY_HIGHWAY_PER_TARGET
Definition: aligned_allocator.h:23
FunctionCache< RetType, Args... > FunctionCacheFactory(RetType(*)(Args...))
Definition: highway.h:116
ChosenTarget chosen_target
size_t HWY_INLINE GetIndex() const
Definition: targets.h:242
Definition: highway.h:94
RetType() FunctionType(Args...)
Definition: highway.h:96
static RetType ChooseAndCall(Args... args)
Definition: highway.h:106