16 #ifndef HIGHWAY_HWY_BASE_H_
17 #define HIGHWAY_HWY_BASE_H_
36 #define HWY_STR_IMPL(macro) #macro
37 #define HWY_STR(macro) HWY_STR_IMPL(macro)
43 #define HWY_RESTRICT __restrict
44 #define HWY_INLINE __forceinline
45 #define HWY_NOINLINE __declspec(noinline)
47 #define HWY_NORETURN __declspec(noreturn)
48 #define HWY_LIKELY(expr) (expr)
49 #define HWY_UNLIKELY(expr) (expr)
50 #define HWY_PRAGMA(tokens) __pragma(tokens)
51 #define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(warning(tokens))
52 #define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(msc)
53 #define HWY_MAYBE_UNUSED
54 #define HWY_HAS_ASSUME_ALIGNED 0
55 #if (_MSC_VER >= 1700)
56 #define HWY_MUST_USE_RESULT _Check_return_
58 #define HWY_MUST_USE_RESULT
63 #define HWY_RESTRICT __restrict__
64 #define HWY_INLINE inline __attribute__((always_inline))
65 #define HWY_NOINLINE __attribute__((noinline))
66 #define HWY_FLATTEN __attribute__((flatten))
67 #define HWY_NORETURN __attribute__((noreturn))
68 #define HWY_LIKELY(expr) __builtin_expect(!!(expr), 1)
69 #define HWY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
70 #define HWY_PRAGMA(tokens) _Pragma(#tokens)
71 #define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(GCC diagnostic tokens)
72 #define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(gcc)
75 #define HWY_MAYBE_UNUSED __attribute__((unused))
76 #define HWY_MUST_USE_RESULT __attribute__((warn_unused_result))
84 #if HWY_HAS_ATTRIBUTE(__format__)
85 #define HWY_FORMAT(idx_fmt, idx_arg) \
86 __attribute__((__format__(__printf__, idx_fmt, idx_arg)))
88 #define HWY_FORMAT(idx_fmt, idx_arg)
96 #if HWY_HAS_BUILTIN(__builtin_assume_aligned)
97 #define HWY_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align))
99 #define HWY_ASSUME_ALIGNED(ptr, align) (ptr)
105 #if HWY_COMPILER_CLANG
106 #define HWY_PUSH_ATTRIBUTES(targets_str) \
107 HWY_PRAGMA(clang attribute push(__attribute__((target(targets_str))), \
108 apply_to = function))
109 #define HWY_POP_ATTRIBUTES HWY_PRAGMA(clang attribute pop)
110 #elif HWY_COMPILER_GCC
111 #define HWY_PUSH_ATTRIBUTES(targets_str) \
112 HWY_PRAGMA(GCC push_options) HWY_PRAGMA(GCC target targets_str)
113 #define HWY_POP_ATTRIBUTES HWY_PRAGMA(GCC pop_options)
115 #define HWY_PUSH_ATTRIBUTES(targets_str)
116 #define HWY_POP_ATTRIBUTES
122 #define HWY_API static HWY_INLINE HWY_FLATTEN HWY_MAYBE_UNUSED
124 #define HWY_CONCAT_IMPL(a, b) a##b
125 #define HWY_CONCAT(a, b) HWY_CONCAT_IMPL(a, b)
127 #define HWY_MIN(a, b) ((a) < (b) ? (a) : (b))
128 #define HWY_MAX(a, b) ((a) > (b) ? (a) : (b))
134 #define HWY_FENCE std::atomic_thread_fence(std::memory_order_acq_rel)
141 #define HWY_REP4(literal) literal, literal, literal, literal
143 #define HWY_ABORT(format, ...) \
144 ::hwy::Abort(__FILE__, __LINE__, format, ##__VA_ARGS__)
147 #define HWY_ASSERT(condition) \
149 if (!(condition)) { \
150 HWY_ABORT("Assert %s", #condition); \
154 #if HWY_HAS_FEATURE(memory_sanitizer) || defined(MEMORY_SANITIZER)
155 #define HWY_IS_MSAN 1
157 #define HWY_IS_MSAN 0
160 #if HWY_HAS_FEATURE(address_sanitizer) || defined(ADDRESS_SANITIZER)
161 #define HWY_IS_ASAN 1
163 #define HWY_IS_ASAN 0
166 #if HWY_HAS_FEATURE(thread_sanitizer) || defined(THREAD_SANITIZER)
167 #define HWY_IS_TSAN 1
169 #define HWY_IS_TSAN 0
175 #define HWY_ATTR_NO_MSAN __attribute__((no_sanitize_memory))
177 #define HWY_ATTR_NO_MSAN
181 #if !defined(HWY_IS_DEBUG_BUILD)
184 #if (!defined(__OPTIMIZE__) && !defined(NDEBUG)) || HWY_IS_ASAN || \
185 HWY_IS_MSAN || HWY_IS_TSAN || defined(__clang_analyzer__)
186 #define HWY_IS_DEBUG_BUILD 1
188 #define HWY_IS_DEBUG_BUILD 0
192 #if HWY_IS_DEBUG_BUILD
193 #define HWY_DASSERT(condition) HWY_ASSERT(condition)
195 #define HWY_DASSERT(condition) \
207 #elif HWY_ARCH_RVV && defined(__riscv_vector)
221 #define HWY_ALIGN_MAX alignas(64)
222 #elif HWY_ARCH_RVV && defined(__riscv_vector)
223 #define HWY_ALIGN_MAX alignas(8)
225 #define HWY_ALIGN_MAX alignas(16)
234 #if HWY_ARCH_ARM && (__ARM_FP & 2)
235 #define HWY_NATIVE_FLOAT16 1
237 #define HWY_NATIVE_FLOAT16 0
240 #pragma pack(push, 1)
242 #if HWY_NATIVE_FLOAT16
243 using float16_t = __fp16;
265 template <
bool Condition>
272 template <
bool Condition>
275 template <
typename T,
typename U>
280 template <
typename T>
285 template <
typename T,
typename U>
296 #define HWY_IF_LE128(T, N) hwy::EnableIf<N * sizeof(T) <= 16>* = nullptr
297 #define HWY_IF_LE64(T, N) hwy::EnableIf<N * sizeof(T) <= 8>* = nullptr
298 #define HWY_IF_LE32(T, N) hwy::EnableIf<N * sizeof(T) <= 4>* = nullptr
299 #define HWY_IF_GE32(T, N) hwy::EnableIf<N * sizeof(T) >= 4>* = nullptr
300 #define HWY_IF_GE64(T, N) hwy::EnableIf<N * sizeof(T) >= 8>* = nullptr
301 #define HWY_IF_GE128(T, N) hwy::EnableIf<N * sizeof(T) >= 16>* = nullptr
302 #define HWY_IF_GT128(T, N) hwy::EnableIf<(N * sizeof(T) > 16)>* = nullptr
304 #define HWY_IF_UNSIGNED(T) hwy::EnableIf<!IsSigned<T>()>* = nullptr
305 #define HWY_IF_SIGNED(T) \
306 hwy::EnableIf<IsSigned<T>() && !IsFloat<T>()>* = nullptr
307 #define HWY_IF_FLOAT(T) hwy::EnableIf<hwy::IsFloat<T>()>* = nullptr
308 #define HWY_IF_NOT_FLOAT(T) hwy::EnableIf<!hwy::IsFloat<T>()>* = nullptr
310 #define HWY_IF_LANE_SIZE(T, bytes) \
311 hwy::EnableIf<sizeof(T) == (bytes)>* = nullptr
312 #define HWY_IF_NOT_LANE_SIZE(T, bytes) \
313 hwy::EnableIf<sizeof(T) != (bytes)>* = nullptr
336 template <
typename T>
451 template <
typename T>
453 template <
typename T>
455 template <
typename T>
459 template <
typename T>
461 template <
typename T>
475 template <
typename T>
479 return IsSame<T, float>() || IsSame<T, double>();
482 template <
typename T>
496 template <
typename T>
498 static_assert(!IsFloat<T>(),
"Only for integer types");
500 return static_cast<T
>(IsSigned<T>() ? (
static_cast<TU
>(~0ull) >> 1)
501 :
static_cast<TU
>(~0ull));
503 template <
typename T>
505 static_assert(!IsFloat<T>(),
"Only for integer types");
506 return IsSigned<T>() ? T(-1) - LimitsMax<T>() : T(0);
511 template <
typename T>
513 return LimitsMin<T>();
524 template <
typename T>
526 return LimitsMax<T>();
538 template <
typename T>
540 static_assert(
sizeof(T) == 0,
"Only instantiate the specializations");
549 return 0x7FF0000000000000ULL;
553 template <
typename T>
555 static_assert(
sizeof(T) == 0,
"Only instantiate the specializations");
564 return 0x000FFFFFFFFFFFFFULL;
569 template <
typename T>
571 static_assert(
sizeof(T) == 0,
"Only instantiate the specializations");
581 return 4503599627370496.0;
587 template <
typename T1,
typename T2>
589 return (a + b - 1) / b;
593 constexpr
inline size_t RoundUpTo(
size_t what,
size_t align) {
594 return DivCeil(what, align) * align;
599 #if HWY_COMPILER_MSVC
601 _BitScanForward(&index, x);
604 return static_cast<size_t>(__builtin_ctz(x));
609 #if HWY_COMPILER_MSVC
612 _BitScanForward64(&index, x);
616 uint32_t lsb =
static_cast<uint32_t
>(x & 0xFFFFFFFF);
619 uint32_t msb =
static_cast<uint32_t
>(x >> 32u);
620 _BitScanForward(&index, msb);
623 _BitScanForward(&index, lsb);
628 return static_cast<size_t>(__builtin_ctzll(x));
634 #if HWY_COMPILER_MSVC
636 _BitScanReverse(&index, x);
639 return static_cast<size_t>(__builtin_clz(x));
644 #if HWY_COMPILER_MSVC
647 _BitScanReverse64(&index, x);
651 const uint32_t msb =
static_cast<uint32_t
>(x >> 32u);
654 const uint32_t lsb =
static_cast<uint32_t
>(x & 0xFFFFFFFF);
655 _BitScanReverse(&index, lsb);
658 _BitScanReverse(&index, msb);
663 return static_cast<size_t>(__builtin_clzll(x));
668 #if HWY_COMPILER_CLANG || HWY_COMPILER_GCC
669 return static_cast<size_t>(__builtin_popcountll(x));
674 #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64 && defined(__AVX__)
675 return _mm_popcnt_u64(x);
676 #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_32 && defined(__AVX__)
677 return _mm_popcnt_u32(uint32_t(x)) + _mm_popcnt_u32(uint32_t(x >> 32));
679 x -= ((x >> 1) & 0x5555555555555555ULL);
680 x = (((x >> 2) & 0x3333333333333333ULL) + (x & 0x3333333333333333ULL));
681 x = (((x >> 4) + x) & 0x0F0F0F0F0F0F0F0FULL);
685 return static_cast<size_t>(x & 0x7Fu);
692 template <
typename TI>
696 :
static_cast<size_t>(
FloorLog2(
static_cast<TI
>(x >> 1)) + 1);
699 template <
typename TI>
703 :
static_cast<size_t>(
FloorLog2(
static_cast<TI
>(x - 1)) + 1);
706 #if HWY_COMPILER_MSVC && HWY_ARCH_X86_64
707 #pragma intrinsic(_umul128)
712 #if defined(__SIZEOF_INT128__)
713 __uint128_t product = (__uint128_t)a * (__uint128_t)b;
714 *upper = (uint64_t)(product >> 64);
715 return (uint64_t)(product & 0xFFFFFFFFFFFFFFFFULL);
716 #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64
717 return _umul128(a, b, upper);
719 constexpr uint64_t kLo32 = 0xFFFFFFFFU;
720 const uint64_t lo_lo = (a & kLo32) * (b & kLo32);
721 const uint64_t hi_lo = (a >> 32) * (b & kLo32);
722 const uint64_t lo_hi = (a & kLo32) * (b >> 32);
723 const uint64_t hi_hi = (a >> 32) * (b >> 32);
724 const uint64_t t = (lo_lo >> 32) + (hi_lo & kLo32) + lo_hi;
725 *upper = (hi_lo >> 32) + (t >> 32) + hi_hi;
726 return (t << 32) | (lo_lo & kLo32);
731 template <
size_t kBytes,
typename From,
typename To>
733 #if HWY_COMPILER_MSVC
735 reinterpret_cast<const uint8_t*
>(from);
736 uint8_t*
HWY_RESTRICT to_bytes =
reinterpret_cast<uint8_t*
>(to);
737 for (
size_t i = 0; i < kBytes; ++i) {
738 to_bytes[i] = from_bytes[i];
742 __builtin_memcpy(to, from, kBytes);
747 uint32_t bits = bf.
bits;
750 CopyBytes<4>(&bits, &f);
756 CopyBytes<4>(&f, &bits);
758 bf.
bits =
static_cast<uint16_t
>(bits >> 16);
#define HWY_RESTRICT
Definition: base.h:63
#define HWY_NORETURN
Definition: base.h:67
#define HWY_API
Definition: base.h:122
#define HWY_MAYBE_UNUSED
Definition: base.h:75
#define HWY_DLLEXPORT
Definition: highway_export.h:18
Definition: aligned_allocator.h:27
HWY_API void CopyBytes(const From *from, To *to)
Definition: base.h:732
constexpr T MantissaEnd()
Definition: base.h:570
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero64(const uint64_t x)
Definition: base.h:608
HWY_API float F32FromBF16(bfloat16_t bf)
Definition: base.h:746
constexpr HWY_API T LimitsMin()
Definition: base.h:504
HWY_API uint64_t Mul128(uint64_t a, uint64_t b, uint64_t *HWY_RESTRICT upper)
Definition: base.h:711
HWY_API bfloat16_t BF16FromF32(float f)
Definition: base.h:754
typename detail::TypeFromSize< N >::Float FloatFromSize
Definition: base.h:470
typename RemoveConstT< T >::type RemoveConst
Definition: base.h:329
typename detail::TypeFromSize< N >::Unsigned UnsignedFromSize
Definition: base.h:466
constexpr float HighestValue< float >()
Definition: base.h:529
constexpr T ExponentMask()
Definition: base.h:539
constexpr HWY_API T LimitsMax()
Definition: base.h:497
typename detail::TypeFromSize< N >::Signed SignedFromSize
Definition: base.h:468
constexpr T1 DivCeil(T1 a, T2 b)
Definition: base.h:588
constexpr float MantissaEnd< float >()
Definition: base.h:575
double float64_t
Definition: base.h:260
constexpr bool IsSigned< bfloat16_t >()
Definition: base.h:491
constexpr size_t FloorLog2(TI x)
Definition: base.h:693
HWY_API size_t Num0BitsAboveMS1Bit_Nonzero32(const uint32_t x)
Definition: base.h:633
constexpr bool IsSigned< float16_t >()
Definition: base.h:487
constexpr double HighestValue< double >()
Definition: base.h:533
typename EnableIfT< Condition >::type EnableIf
Definition: base.h:273
constexpr HWY_API bool IsFloat()
Definition: base.h:476
float float32_t
Definition: base.h:259
HWY_API size_t PopCount(uint64_t x)
Definition: base.h:667
constexpr double MantissaEnd< double >()
Definition: base.h:579
constexpr uint64_t ExponentMask< uint64_t >()
Definition: base.h:548
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x)
Definition: base.h:598
constexpr float LowestValue< float >()
Definition: base.h:516
constexpr HWY_API bool IsSame()
Definition: base.h:286
constexpr size_t CeilLog2(TI x)
Definition: base.h:700
HWY_API size_t Num0BitsAboveMS1Bit_Nonzero64(const uint64_t x)
Definition: base.h:643
constexpr double LowestValue< double >()
Definition: base.h:520
constexpr uint32_t ExponentMask< uint32_t >()
Definition: base.h:544
constexpr HWY_API T LowestValue()
Definition: base.h:512
constexpr HWY_API T HighestValue()
Definition: base.h:525
static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize
Definition: base.h:211
constexpr T MantissaMask()
Definition: base.h:554
constexpr HWY_API bool IsSigned()
Definition: base.h:483
typename detail::Relations< T >::Unsigned MakeUnsigned
Definition: base.h:452
constexpr uint32_t MantissaMask< uint32_t >()
Definition: base.h:559
typename detail::Relations< T >::Wide MakeWide
Definition: base.h:460
constexpr uint64_t MantissaMask< uint64_t >()
Definition: base.h:563
typename detail::Relations< T >::Float MakeFloat
Definition: base.h:456
typename detail::Relations< T >::Signed MakeSigned
Definition: base.h:454
HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4) Abort(const char *file
HWY_DLLEXPORT HWY_NORETURN void int const char * format
Definition: base.h:763
HWY_DLLEXPORT HWY_NORETURN void int line
Definition: base.h:763
constexpr size_t RoundUpTo(size_t what, size_t align)
Definition: base.h:593
typename detail::Relations< T >::Narrow MakeNarrow
Definition: base.h:462
void type
Definition: base.h:269
@ value
Definition: base.h:277
T type
Definition: base.h:325
T type
Definition: base.h:321
uint16_t bits
Definition: base.h:254
int16_t Signed
Definition: base.h:404
float Wide
Definition: base.h:405
uint16_t Unsigned
Definition: base.h:403
double Float
Definition: base.h:419
uint64_t Unsigned
Definition: base.h:417
int64_t Signed
Definition: base.h:418
float Narrow
Definition: base.h:420
int16_t Signed
Definition: base.h:397
float Wide
Definition: base.h:399
uint16_t Unsigned
Definition: base.h:396
uint32_t Unsigned
Definition: base.h:409
double Wide
Definition: base.h:412
float Float
Definition: base.h:411
int32_t Signed
Definition: base.h:410
uint16_t Unsigned
Definition: base.h:359
int16_t Signed
Definition: base.h:360
int32_t Wide
Definition: base.h:361
int8_t Narrow
Definition: base.h:362
uint32_t Unsigned
Definition: base.h:374
int64_t Wide
Definition: base.h:377
float Float
Definition: base.h:376
int16_t Narrow
Definition: base.h:378
int32_t Signed
Definition: base.h:375
int32_t Narrow
Definition: base.h:392
double Float
Definition: base.h:391
uint64_t Unsigned
Definition: base.h:389
int64_t Signed
Definition: base.h:390
int16_t Wide
Definition: base.h:348
int8_t Signed
Definition: base.h:347
uint8_t Unsigned
Definition: base.h:346
uint8_t Narrow
Definition: base.h:355
int16_t Signed
Definition: base.h:353
uint32_t Wide
Definition: base.h:354
uint16_t Unsigned
Definition: base.h:352
uint32_t Unsigned
Definition: base.h:366
uint64_t Wide
Definition: base.h:369
uint16_t Narrow
Definition: base.h:370
float Float
Definition: base.h:368
int32_t Signed
Definition: base.h:367
uint32_t Narrow
Definition: base.h:385
int64_t Signed
Definition: base.h:383
uint64_t Unsigned
Definition: base.h:382
double Float
Definition: base.h:384
int8_t Signed
Definition: base.h:341
uint8_t Unsigned
Definition: base.h:340
uint16_t Wide
Definition: base.h:342
int8_t Signed
Definition: base.h:428
uint8_t Unsigned
Definition: base.h:427
int16_t Signed
Definition: base.h:433
uint16_t Unsigned
Definition: base.h:432
int32_t Signed
Definition: base.h:438
uint32_t Unsigned
Definition: base.h:437
float Float
Definition: base.h:439
double Float
Definition: base.h:445
int64_t Signed
Definition: base.h:444
uint64_t Unsigned
Definition: base.h:443
uint16_t bits
Definition: base.h:249