Grok  9.5.0
base.h
Go to the documentation of this file.
1 // Copyright 2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef HIGHWAY_HWY_BASE_H_
16 #define HIGHWAY_HWY_BASE_H_
17 
18 // For SIMD module implementations and their callers, target-independent.
19 
20 #include <stddef.h>
21 #include <stdint.h>
22 
23 #include <atomic>
24 #include <cfloat>
25 
27 
28 //------------------------------------------------------------------------------
29 // Compiler-specific definitions
30 
31 #define HWY_STR_IMPL(macro) #macro
32 #define HWY_STR(macro) HWY_STR_IMPL(macro)
33 
34 #if HWY_COMPILER_MSVC
35 
36 #include <intrin.h>
37 
38 #define HWY_RESTRICT __restrict
39 #define HWY_INLINE __forceinline
40 #define HWY_NOINLINE __declspec(noinline)
41 #define HWY_FLATTEN
42 #define HWY_NORETURN __declspec(noreturn)
43 #define HWY_LIKELY(expr) (expr)
44 #define HWY_UNLIKELY(expr) (expr)
45 #define HWY_PRAGMA(tokens) __pragma(tokens)
46 #define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(warning(tokens))
47 #define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(msc)
48 #define HWY_MAYBE_UNUSED
49 #define HWY_HAS_ASSUME_ALIGNED 0
50 #if (_MSC_VER >= 1700)
51 #define HWY_MUST_USE_RESULT _Check_return_
52 #else
53 #define HWY_MUST_USE_RESULT
54 #endif
55 
56 #else
57 
58 #define HWY_RESTRICT __restrict__
59 #define HWY_INLINE inline __attribute__((always_inline))
60 #define HWY_NOINLINE __attribute__((noinline))
61 #define HWY_FLATTEN __attribute__((flatten))
62 #define HWY_NORETURN __attribute__((noreturn))
63 #define HWY_LIKELY(expr) __builtin_expect(!!(expr), 1)
64 #define HWY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
65 #define HWY_PRAGMA(tokens) _Pragma(#tokens)
66 #define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(GCC diagnostic tokens)
67 #define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(gcc)
68 // Encountered "attribute list cannot appear here" when using the C++17
69 // [[maybe_unused]], so only use the old style attribute for now.
70 #define HWY_MAYBE_UNUSED __attribute__((unused))
71 #define HWY_MUST_USE_RESULT __attribute__((warn_unused_result))
72 
73 #endif // !HWY_COMPILER_MSVC
74 
75 //------------------------------------------------------------------------------
76 // Builtin/attributes
77 
78 // Enables error-checking of format strings.
79 #if HWY_HAS_ATTRIBUTE(__format__)
80 #define HWY_FORMAT(idx_fmt, idx_arg) \
81  __attribute__((__format__(__printf__, idx_fmt, idx_arg)))
82 #else
83 #define HWY_FORMAT(idx_fmt, idx_arg)
84 #endif
85 
86 // Returns a void* pointer which the compiler then assumes is N-byte aligned.
87 // Example: float* HWY_RESTRICT aligned = (float*)HWY_ASSUME_ALIGNED(in, 32);
88 //
89 // The assignment semantics are required by GCC/Clang. ICC provides an in-place
90 // __assume_aligned, whereas MSVC's __assume appears unsuitable.
91 #if HWY_HAS_BUILTIN(__builtin_assume_aligned)
92 #define HWY_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align))
93 #else
94 #define HWY_ASSUME_ALIGNED(ptr, align) (ptr) /* not supported */
95 #endif
96 
97 // Clang and GCC require attributes on each function into which SIMD intrinsics
98 // are inlined. Support both per-function annotation (HWY_ATTR) for lambdas and
99 // automatic annotation via pragmas.
100 #if HWY_COMPILER_CLANG
101 #define HWY_PUSH_ATTRIBUTES(targets_str) \
102  HWY_PRAGMA(clang attribute push(__attribute__((target(targets_str))), \
103  apply_to = function))
104 #define HWY_POP_ATTRIBUTES HWY_PRAGMA(clang attribute pop)
105 #elif HWY_COMPILER_GCC
106 #define HWY_PUSH_ATTRIBUTES(targets_str) \
107  HWY_PRAGMA(GCC push_options) HWY_PRAGMA(GCC target targets_str)
108 #define HWY_POP_ATTRIBUTES HWY_PRAGMA(GCC pop_options)
109 #else
110 #define HWY_PUSH_ATTRIBUTES(targets_str)
111 #define HWY_POP_ATTRIBUTES
112 #endif
113 
114 //------------------------------------------------------------------------------
115 // Macros
116 
117 #define HWY_API static HWY_INLINE HWY_FLATTEN HWY_MAYBE_UNUSED
118 
119 #define HWY_CONCAT_IMPL(a, b) a##b
120 #define HWY_CONCAT(a, b) HWY_CONCAT_IMPL(a, b)
121 
122 #define HWY_MIN(a, b) ((a) < (b) ? (a) : (b))
123 #define HWY_MAX(a, b) ((a) > (b) ? (a) : (b))
124 
125 // Compile-time fence to prevent undesirable code reordering. On Clang x86, the
126 // typical asm volatile("" : : : "memory") has no effect, whereas atomic fence
127 // does, without generating code.
128 #if HWY_ARCH_X86
129 #define HWY_FENCE std::atomic_thread_fence(std::memory_order_acq_rel)
130 #else
131 // TODO(janwas): investigate alternatives. On ARM, the above generates barriers.
132 #define HWY_FENCE
133 #endif
134 
135 // 4 instances of a given literal value, useful as input to LoadDup128.
136 #define HWY_REP4(literal) literal, literal, literal, literal
137 
138 #define HWY_ABORT(format, ...) \
139  ::hwy::Abort(__FILE__, __LINE__, format, ##__VA_ARGS__)
140 
141 // Always enabled.
142 #define HWY_ASSERT(condition) \
143  do { \
144  if (!(condition)) { \
145  HWY_ABORT("Assert %s", #condition); \
146  } \
147  } while (0)
148 
149 // For enabling HWY_DASSERT and shortening tests in slower debug builds
150 #if !defined(HWY_IS_DEBUG_BUILD)
151 // Clang does not define NDEBUG, but it and GCC define __OPTIMIZE__, and recent
152 // MSVC defines NDEBUG (if not, could instead check _DEBUG).
153 #if (!defined(__OPTIMIZE__) && !defined(NDEBUG)) || \
154  defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
155  defined(THREAD_SANITIZER) || defined(__clang_analyzer__)
156 #define HWY_IS_DEBUG_BUILD 1
157 #else
158 #define HWY_IS_DEBUG_BUILD 0
159 #endif
160 #endif // HWY_IS_DEBUG_BUILD
161 
162 #if HWY_IS_DEBUG_BUILD
163 #define HWY_DASSERT(condition) HWY_ASSERT(condition)
164 #else
165 #define HWY_DASSERT(condition) \
166  do { \
167  } while (0)
168 #endif
169 
170 #if defined(HWY_EMULATE_SVE)
171 class FarmFloat16;
172 #endif
173 
174 namespace hwy {
175 
176 //------------------------------------------------------------------------------
177 // kMaxVectorSize (undocumented, pending removal)
178 
179 #if HWY_ARCH_X86
180 static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 64; // AVX-512
181 #elif HWY_ARCH_RVV && defined(__riscv_vector)
182 // Not actually an upper bound on the size.
183 static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 4096;
184 #else
185 static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize = 16;
186 #endif
187 
188 //------------------------------------------------------------------------------
189 // Alignment
190 
191 // For stack-allocated partial arrays or LoadDup128.
192 #if HWY_ARCH_X86
193 #define HWY_ALIGN_MAX alignas(64)
194 #elif HWY_ARCH_RVV && defined(__riscv_vector)
195 #define HWY_ALIGN_MAX alignas(8) // only elements need be aligned
196 #else
197 #define HWY_ALIGN_MAX alignas(16)
198 #endif
199 
200 //------------------------------------------------------------------------------
201 // Lane types
202 
203 // Match [u]int##_t naming scheme so rvv-inl.h macros can obtain the type name
204 // by concatenating base type and bits.
205 
206 #if HWY_ARCH_ARM && (__ARM_FP & 2)
207 #define HWY_NATIVE_FLOAT16 1
208 #else
209 #define HWY_NATIVE_FLOAT16 0
210 #endif
211 
212 #pragma pack(push, 1)
213 
214 #if defined(HWY_EMULATE_SVE)
215 using float16_t = FarmFloat16;
216 #elif HWY_NATIVE_FLOAT16
217 using float16_t = __fp16;
218 // Clang does not allow __fp16 arguments, but scalar.h requires LaneType
219 // arguments, so use a wrapper.
220 // TODO(janwas): replace with _Float16 when that is supported?
221 #else
222 struct float16_t {
223  uint16_t bits;
224 };
225 #endif
226 
227 struct bfloat16_t {
228  uint16_t bits;
229 };
230 
231 #pragma pack(pop)
232 
233 using float32_t = float;
234 using float64_t = double;
235 
236 //------------------------------------------------------------------------------
237 // Controlling overload resolution (SFINAE)
238 
239 template <bool Condition, class T>
240 struct EnableIfT {};
241 template <class T>
242 struct EnableIfT<true, T> {
243  using type = T;
244 };
245 
246 template <bool Condition, class T = void>
248 
249 template <typename T, typename U>
250 struct IsSameT {
251  enum { value = 0 };
252 };
253 
254 template <typename T>
255 struct IsSameT<T, T> {
256  enum { value = 1 };
257 };
258 
259 template <typename T, typename U>
260 HWY_API constexpr bool IsSame() {
261  return IsSameT<T, U>::value;
262 }
263 
264 // Insert into template/function arguments to enable this overload only for
265 // vectors of AT MOST this many bits.
266 //
267 // Note that enabling for exactly 128 bits is unnecessary because a function can
268 // simply be overloaded with Vec128<T> and Full128<T> descriptor. Enabling for
269 // other sizes (e.g. 64 bit) can be achieved with Simd<T, 8 / sizeof(T)>.
270 #define HWY_IF_LE128(T, N) hwy::EnableIf<N * sizeof(T) <= 16>* = nullptr
271 #define HWY_IF_LE64(T, N) hwy::EnableIf<N * sizeof(T) <= 8>* = nullptr
272 #define HWY_IF_LE32(T, N) hwy::EnableIf<N * sizeof(T) <= 4>* = nullptr
273 #define HWY_IF_GE64(T, N) hwy::EnableIf<N * sizeof(T) >= 8>* = nullptr
274 #define HWY_IF_GE128(T, N) hwy::EnableIf<N * sizeof(T) >= 16>* = nullptr
275 #define HWY_IF_GT128(T, N) hwy::EnableIf<(N * sizeof(T) > 16)>* = nullptr
276 
277 #define HWY_IF_UNSIGNED(T) hwy::EnableIf<!IsSigned<T>()>* = nullptr
278 #define HWY_IF_SIGNED(T) \
279  hwy::EnableIf<IsSigned<T>() && !IsFloat<T>()>* = nullptr
280 #define HWY_IF_FLOAT(T) hwy::EnableIf<hwy::IsFloat<T>()>* = nullptr
281 #define HWY_IF_NOT_FLOAT(T) hwy::EnableIf<!hwy::IsFloat<T>()>* = nullptr
282 
283 #define HWY_IF_LANE_SIZE(T, bytes) \
284  hwy::EnableIf<sizeof(T) == (bytes)>* = nullptr
285 #define HWY_IF_NOT_LANE_SIZE(T, bytes) \
286  hwy::EnableIf<sizeof(T) != (bytes)>* = nullptr
287 
288 // Empty struct used as a size tag type.
289 template <size_t N>
290 struct SizeTag {};
291 
292 template <class T>
293 struct RemoveConstT {
294  using type = T;
295 };
296 template <class T>
297 struct RemoveConstT<const T> {
298  using type = T;
299 };
300 
301 template <class T>
303 
304 //------------------------------------------------------------------------------
305 // Type traits
306 
307 template <typename T>
308 constexpr bool IsFloat() {
309  // Cannot use T(1.25) != T(1) for float16_t, which can only be converted to or
310  // from a float, not compared.
311  return IsSame<T, float>() || IsSame<T, double>();
312 }
313 
314 template <typename T>
315 constexpr bool IsSigned() {
316  return T(0) > T(-1);
317 }
318 template <>
319 constexpr bool IsSigned<float16_t>() {
320  return true;
321 }
322 template <>
323 constexpr bool IsSigned<bfloat16_t>() {
324  return true;
325 }
326 
327 // Largest/smallest representable integer values.
328 template <typename T>
329 constexpr T LimitsMax() {
330  static_assert(!IsFloat<T>(), "Only for integer types");
331  return IsSigned<T>() ? T((1ULL << (sizeof(T) * 8 - 1)) - 1)
332  : static_cast<T>(~0ull);
333 }
334 template <typename T>
335 constexpr T LimitsMin() {
336  static_assert(!IsFloat<T>(), "Only for integer types");
337  return IsSigned<T>() ? T(-1) - LimitsMax<T>() : T(0);
338 }
339 
340 // Largest/smallest representable value (integer or float). This naming avoids
341 // confusion with numeric_limits<float>::min() (the smallest positive value).
342 template <typename T>
343 constexpr T LowestValue() {
344  return LimitsMin<T>();
345 }
346 template <>
347 constexpr float LowestValue<float>() {
348  return -FLT_MAX;
349 }
350 template <>
351 constexpr double LowestValue<double>() {
352  return -DBL_MAX;
353 }
354 
355 template <typename T>
356 constexpr T HighestValue() {
357  return LimitsMax<T>();
358 }
359 template <>
360 constexpr float HighestValue<float>() {
361  return FLT_MAX;
362 }
363 template <>
364 constexpr double HighestValue<double>() {
365  return DBL_MAX;
366 }
367 
368 // Returns bitmask of the exponent field in IEEE binary32/64.
369 template <typename T>
370 constexpr T ExponentMask() {
371  static_assert(sizeof(T) == 0, "Only instantiate the specializations");
372  return 0;
373 }
374 template <>
375 constexpr uint32_t ExponentMask<uint32_t>() {
376  return 0x7F800000;
377 }
378 template <>
379 constexpr uint64_t ExponentMask<uint64_t>() {
380  return 0x7FF0000000000000ULL;
381 }
382 
383 // Returns 1 << mantissa_bits as a floating-point number. All integers whose
384 // absolute value are less than this can be represented exactly.
385 template <typename T>
386 constexpr T MantissaEnd() {
387  static_assert(sizeof(T) == 0, "Only instantiate the specializations");
388  return 0;
389 }
390 template <>
391 constexpr float MantissaEnd<float>() {
392  return 8388608.0f; // 1 << 23
393 }
394 template <>
395 constexpr double MantissaEnd<double>() {
396  // floating point literal with p52 requires C++17.
397  return 4503599627370496.0; // 1 << 52
398 }
399 
400 //------------------------------------------------------------------------------
401 // Type relations
402 
403 namespace detail {
404 
405 template <typename T>
406 struct Relations;
407 template <>
408 struct Relations<uint8_t> {
409  using Unsigned = uint8_t;
410  using Signed = int8_t;
411  using Wide = uint16_t;
412 };
413 template <>
414 struct Relations<int8_t> {
415  using Unsigned = uint8_t;
416  using Signed = int8_t;
417  using Wide = int16_t;
418 };
419 template <>
420 struct Relations<uint16_t> {
421  using Unsigned = uint16_t;
422  using Signed = int16_t;
423  using Wide = uint32_t;
424  using Narrow = uint8_t;
425 };
426 template <>
427 struct Relations<int16_t> {
428  using Unsigned = uint16_t;
429  using Signed = int16_t;
430  using Wide = int32_t;
431  using Narrow = int8_t;
432 };
433 template <>
434 struct Relations<uint32_t> {
435  using Unsigned = uint32_t;
436  using Signed = int32_t;
437  using Float = float;
438  using Wide = uint64_t;
439  using Narrow = uint16_t;
440 };
441 template <>
442 struct Relations<int32_t> {
443  using Unsigned = uint32_t;
444  using Signed = int32_t;
445  using Float = float;
446  using Wide = int64_t;
447  using Narrow = int16_t;
448 };
449 template <>
450 struct Relations<uint64_t> {
451  using Unsigned = uint64_t;
452  using Signed = int64_t;
453  using Float = double;
454  using Narrow = uint32_t;
455 };
456 template <>
457 struct Relations<int64_t> {
458  using Unsigned = uint64_t;
459  using Signed = int64_t;
460  using Float = double;
461  using Narrow = int32_t;
462 };
463 template <>
465  using Unsigned = uint16_t;
466  using Signed = int16_t;
467  using Float = float16_t;
468  using Wide = float;
469 };
470 template <>
472  using Unsigned = uint16_t;
473  using Signed = int16_t;
474  using Wide = float;
475 };
476 template <>
477 struct Relations<float> {
478  using Unsigned = uint32_t;
479  using Signed = int32_t;
480  using Float = float;
481  using Wide = double;
482  using Narrow = float16_t;
483 };
484 template <>
485 struct Relations<double> {
486  using Unsigned = uint64_t;
487  using Signed = int64_t;
488  using Float = double;
489  using Narrow = float;
490 };
491 
492 template <size_t N>
494 template <>
495 struct TypeFromSize<1> {
496  using Unsigned = uint8_t;
497  using Signed = int8_t;
498 };
499 template <>
500 struct TypeFromSize<2> {
501  using Unsigned = uint16_t;
502  using Signed = int16_t;
503 };
504 template <>
505 struct TypeFromSize<4> {
506  using Unsigned = uint32_t;
507  using Signed = int32_t;
508  using Float = float;
509 };
510 template <>
511 struct TypeFromSize<8> {
512  using Unsigned = uint64_t;
513  using Signed = int64_t;
514  using Float = double;
515 };
516 
517 } // namespace detail
518 
519 // Aliases for types of a different category, but the same size.
520 template <typename T>
522 template <typename T>
524 template <typename T>
526 
527 // Aliases for types of the same category, but different size.
528 template <typename T>
530 template <typename T>
532 
533 // Obtain type from its size [bytes].
534 template <size_t N>
536 template <size_t N>
538 template <size_t N>
540 
541 //------------------------------------------------------------------------------
542 // Helper functions
543 
544 template <typename T1, typename T2>
545 constexpr inline T1 DivCeil(T1 a, T2 b) {
546  return (a + b - 1) / b;
547 }
548 
549 // Works for any `align`; if a power of two, compiler emits ADD+AND.
550 constexpr inline size_t RoundUpTo(size_t what, size_t align) {
551  return DivCeil(what, align) * align;
552 }
553 
554 // Undefined results for x == 0.
555 HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x) {
556 #if HWY_COMPILER_MSVC
557  unsigned long index; // NOLINT
558  _BitScanForward(&index, x);
559  return index;
560 #else // HWY_COMPILER_MSVC
561  return static_cast<size_t>(__builtin_ctz(x));
562 #endif // HWY_COMPILER_MSVC
563 }
564 
565 HWY_API size_t Num0BitsBelowLS1Bit_Nonzero64(const uint64_t x) {
566 #if HWY_COMPILER_MSVC
567 #if HWY_ARCH_X86_64
568  unsigned long index; // NOLINT
569  _BitScanForward64(&index, x);
570  return index;
571 #else // HWY_ARCH_X86_64
572  // _BitScanForward64 not available
573  uint32_t lsb = static_cast<uint32_t>(x & 0xFFFFFFFF);
574  unsigned long index;
575  if (lsb == 0) {
576  uint32_t msb = static_cast<uint32_t>(x >> 32u);
577  _BitScanForward(&index, msb);
578  return 32 + index;
579  } else {
580  _BitScanForward(&index, lsb);
581  return index;
582  }
583 #endif // HWY_ARCH_X86_64
584 #else // HWY_COMPILER_MSVC
585  return static_cast<size_t>(__builtin_ctzll(x));
586 #endif // HWY_COMPILER_MSVC
587 }
588 
589 HWY_API size_t PopCount(uint64_t x) {
590 #if HWY_COMPILER_CLANG || HWY_COMPILER_GCC
591  return static_cast<size_t>(__builtin_popcountll(x));
592 #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64
593  return _mm_popcnt_u64(x);
594 #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_32
595  return _mm_popcnt_u32(uint32_t(x)) + _mm_popcnt_u32(uint32_t(x >> 32));
596 #else
597  x -= ((x >> 1) & 0x55555555U);
598  x = (((x >> 2) & 0x33333333U) + (x & 0x33333333U));
599  x = (((x >> 4) + x) & 0x0F0F0F0FU);
600  x += (x >> 8);
601  x += (x >> 16);
602  x += (x >> 32);
603  x = x & 0x0000007FU;
604  return (unsigned int)x;
605 #endif
606 }
607 
608 #if HWY_COMPILER_MSVC && HWY_ARCH_X86_64
609 #pragma intrinsic(_umul128)
610 #endif
611 
612 // 64 x 64 = 128 bit multiplication
613 HWY_API uint64_t Mul128(uint64_t a, uint64_t b, uint64_t* HWY_RESTRICT upper) {
614 #if defined(__SIZEOF_INT128__)
615  __uint128_t product = (__uint128_t)a * (__uint128_t)b;
616  *upper = (uint64_t)(product >> 64);
617  return (uint64_t)(product & 0xFFFFFFFFFFFFFFFFULL);
618 #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64
619  return _umul128(a, b, upper);
620 #else
621  constexpr uint64_t kLo32 = 0xFFFFFFFFU;
622  const uint64_t lo_lo = (a & kLo32) * (b & kLo32);
623  const uint64_t hi_lo = (a >> 32) * (b & kLo32);
624  const uint64_t lo_hi = (a & kLo32) * (b >> 32);
625  const uint64_t hi_hi = (a >> 32) * (b >> 32);
626  const uint64_t t = (lo_lo >> 32) + (hi_lo & kLo32) + lo_hi;
627  *upper = (hi_lo >> 32) + (t >> 32) + hi_hi;
628  return (t << 32) | (lo_lo & kLo32);
629 #endif
630 }
631 
632 // The source/destination must not overlap/alias.
633 template <size_t kBytes, typename From, typename To>
634 HWY_API void CopyBytes(const From* from, To* to) {
635 #if HWY_COMPILER_MSVC
636  const uint8_t* HWY_RESTRICT from_bytes =
637  reinterpret_cast<const uint8_t*>(from);
638  uint8_t* HWY_RESTRICT to_bytes = reinterpret_cast<uint8_t*>(to);
639  for (size_t i = 0; i < kBytes; ++i) {
640  to_bytes[i] = from_bytes[i];
641  }
642 #else
643  // Avoids horrible codegen on Clang (series of PINSRB)
644  __builtin_memcpy(to, from, kBytes);
645 #endif
646 }
647 
649  uint32_t bits = bf.bits;
650  bits <<= 16;
651  float f;
652  CopyBytes<4>(&bits, &f);
653  return f;
654 }
655 
657  uint32_t bits;
658  CopyBytes<4>(&f, &bits);
659  bfloat16_t bf;
660  bf.bits = static_cast<uint16_t>(bits >> 16);
661  return bf;
662 }
663 
665  Abort(const char* file, int line, const char* format, ...);
666 
667 } // namespace hwy
668 
669 #endif // HIGHWAY_HWY_BASE_H_
#define HWY_RESTRICT
Definition: base.h:58
#define HWY_NORETURN
Definition: base.h:62
#define HWY_API
Definition: base.h:117
#define HWY_MAYBE_UNUSED
Definition: base.h:70
Definition: aligned_allocator.h:23
HWY_API void CopyBytes(const From *from, To *to)
Definition: base.h:634
constexpr T MantissaEnd()
Definition: base.h:386
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero64(const uint64_t x)
Definition: base.h:565
HWY_API float F32FromBF16(bfloat16_t bf)
Definition: base.h:648
HWY_API uint64_t Mul128(uint64_t a, uint64_t b, uint64_t *HWY_RESTRICT upper)
Definition: base.h:613
HWY_API bfloat16_t BF16FromF32(float f)
Definition: base.h:656
typename detail::TypeFromSize< N >::Float FloatFromSize
Definition: base.h:539
typename RemoveConstT< T >::type RemoveConst
Definition: base.h:302
typename detail::TypeFromSize< N >::Unsigned UnsignedFromSize
Definition: base.h:535
constexpr float HighestValue< float >()
Definition: base.h:360
constexpr T ExponentMask()
Definition: base.h:370
typename detail::TypeFromSize< N >::Signed SignedFromSize
Definition: base.h:537
constexpr T1 DivCeil(T1 a, T2 b)
Definition: base.h:545
constexpr float MantissaEnd< float >()
Definition: base.h:391
double float64_t
Definition: base.h:234
constexpr bool IsSigned< bfloat16_t >()
Definition: base.h:323
constexpr bool IsSigned()
Definition: base.h:315
constexpr bool IsSigned< float16_t >()
Definition: base.h:319
constexpr double HighestValue< double >()
Definition: base.h:364
HWY_NORETURN void int const char * format
Definition: base.h:665
float float32_t
Definition: base.h:233
HWY_API size_t PopCount(uint64_t x)
Definition: base.h:589
constexpr double MantissaEnd< double >()
Definition: base.h:395
constexpr uint64_t ExponentMask< uint64_t >()
Definition: base.h:379
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x)
Definition: base.h:555
constexpr T LimitsMin()
Definition: base.h:335
constexpr float LowestValue< float >()
Definition: base.h:347
constexpr HWY_API bool IsSame()
Definition: base.h:260
constexpr double LowestValue< double >()
Definition: base.h:351
constexpr uint32_t ExponentMask< uint32_t >()
Definition: base.h:375
typename EnableIfT< Condition, T >::type EnableIf
Definition: base.h:247
static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize
Definition: base.h:185
constexpr T LowestValue()
Definition: base.h:343
HWY_NORETURN void int line
Definition: base.h:665
typename detail::Relations< T >::Unsigned MakeUnsigned
Definition: base.h:521
HWY_NORETURN void HWY_FORMAT(3, 4) Abort(const char *file
typename detail::Relations< T >::Wide MakeWide
Definition: base.h:529
typename detail::Relations< T >::Float MakeFloat
Definition: base.h:525
typename detail::Relations< T >::Signed MakeSigned
Definition: base.h:523
constexpr bool IsFloat()
Definition: base.h:308
constexpr T LimitsMax()
Definition: base.h:329
constexpr T HighestValue()
Definition: base.h:356
constexpr size_t RoundUpTo(size_t what, size_t align)
Definition: base.h:550
typename detail::Relations< T >::Narrow MakeNarrow
Definition: base.h:531
T type
Definition: base.h:243
Definition: base.h:240
Definition: base.h:250
@ value
Definition: base.h:251
T type
Definition: base.h:298
Definition: base.h:293
T type
Definition: base.h:294
Definition: base.h:290
Definition: base.h:227
uint16_t bits
Definition: base.h:228
int16_t Signed
Definition: base.h:473
float Wide
Definition: base.h:474
uint16_t Unsigned
Definition: base.h:472
double Float
Definition: base.h:488
uint64_t Unsigned
Definition: base.h:486
int64_t Signed
Definition: base.h:487
float Narrow
Definition: base.h:489
int16_t Signed
Definition: base.h:466
float Wide
Definition: base.h:468
uint16_t Unsigned
Definition: base.h:465
uint32_t Unsigned
Definition: base.h:478
double Wide
Definition: base.h:481
float Float
Definition: base.h:480
int32_t Signed
Definition: base.h:479
uint16_t Unsigned
Definition: base.h:428
int16_t Signed
Definition: base.h:429
int32_t Wide
Definition: base.h:430
int8_t Narrow
Definition: base.h:431
uint32_t Unsigned
Definition: base.h:443
int64_t Wide
Definition: base.h:446
float Float
Definition: base.h:445
int16_t Narrow
Definition: base.h:447
int32_t Signed
Definition: base.h:444
int32_t Narrow
Definition: base.h:461
double Float
Definition: base.h:460
uint64_t Unsigned
Definition: base.h:458
int64_t Signed
Definition: base.h:459
int16_t Wide
Definition: base.h:417
int8_t Signed
Definition: base.h:416
uint8_t Unsigned
Definition: base.h:415
uint8_t Narrow
Definition: base.h:424
int16_t Signed
Definition: base.h:422
uint32_t Wide
Definition: base.h:423
uint16_t Unsigned
Definition: base.h:421
uint32_t Unsigned
Definition: base.h:435
uint64_t Wide
Definition: base.h:438
uint16_t Narrow
Definition: base.h:439
float Float
Definition: base.h:437
int32_t Signed
Definition: base.h:436
uint32_t Narrow
Definition: base.h:454
int64_t Signed
Definition: base.h:452
uint64_t Unsigned
Definition: base.h:451
double Float
Definition: base.h:453
int8_t Signed
Definition: base.h:410
uint8_t Unsigned
Definition: base.h:409
uint16_t Wide
Definition: base.h:411
Definition: base.h:406
int8_t Signed
Definition: base.h:497
uint8_t Unsigned
Definition: base.h:496
int16_t Signed
Definition: base.h:502
uint16_t Unsigned
Definition: base.h:501
int32_t Signed
Definition: base.h:507
uint32_t Unsigned
Definition: base.h:506
float Float
Definition: base.h:508
double Float
Definition: base.h:514
int64_t Signed
Definition: base.h:513
uint64_t Unsigned
Definition: base.h:512
Definition: base.h:493
Definition: base.h:222
uint16_t bits
Definition: base.h:223