42 return Min(
Max(lo, v), hi);
47 #if HWY_TARGET != HWY_SCALAR && HWY_TARGET != HWY_RVV
49 template <
size_t kLanes,
class D,
class V = VFromD<D>>
51 constexpr
size_t kBytes = kLanes *
sizeof(LaneType<V>);
52 static_assert(kBytes < 16,
"Shift count is per-block");
53 return CombineShiftRightBytes<kBytes>(d, hi, lo);
57 template <
size_t kLanes,
class V>
59 return CombineShiftRightLanes<kLanes>(DFromV<V>(), hi, lo);
68 const Unsigned bit = Unsigned(1) << (
sizeof(Unsigned) * 8 - 1);
84 #if HWY_TARGET != HWY_SCALAR
100 const auto mask =
Set(du, 0xF);
104 alignas(16)
static constexpr uint8_t basisL[16] = {
105 0x00, 0x70, 0x2A, 0x5A, 0x98, 0xE8, 0xB2, 0xC2,
106 0x08, 0x78, 0x22, 0x52, 0x90, 0xE0, 0xBA, 0xCA};
107 alignas(16)
static constexpr uint8_t basisU[16] = {
108 0x00, 0x4D, 0x7C, 0x31, 0x7D, 0x30, 0x01, 0x4C,
109 0x81, 0xCC, 0xFD, 0xB0, 0xFC, 0xB1, 0x80, 0xCD};
110 const auto sL =
And(state, mask);
111 const auto sU = ShiftRight<4>(state);
114 state =
Xor(gf4L, gf4U);
119 alignas(16)
static constexpr uint8_t kZetaInv[16] = {
120 0x80, 7, 11, 15, 6, 10, 4, 1, 9, 8, 5, 2, 12, 14, 13, 3};
121 alignas(16)
static constexpr uint8_t kInv[16] = {
122 0x80, 1, 8, 13, 15, 6, 5, 14, 2, 12, 11, 10, 9, 3, 7, 4};
124 const auto sL =
And(state, mask);
125 const auto sU = ShiftRight<4>(state);
126 const auto sX =
Xor(sU, sL);
135 alignas(16)
static constexpr uint8_t kAffineL[16] = {
136 0x00, 0xC7, 0xBD, 0x6F, 0x17, 0x6D, 0xD2, 0xD0,
137 0x78, 0xA8, 0x02, 0xC5, 0x7A, 0xBF, 0xAA, 0x15};
138 alignas(16)
static constexpr uint8_t kAffineU[16] = {
139 0x00, 0x6A, 0xBB, 0x5F, 0xA5, 0x74, 0xE4, 0xCF,
140 0xFA, 0x35, 0x2B, 0x41, 0xD1, 0x90, 0x1E, 0x8E};
143 return Xor(
Xor(affL, affU),
Set(du, 0x63));
151 #if (defined(HWY_NATIVE_AES) == defined(HWY_TARGET_TOGGLE))
152 #ifdef HWY_NATIVE_AES
153 #undef HWY_NATIVE_AES
155 #define HWY_NATIVE_AES
159 #if HWY_TARGET != HWY_SCALAR
164 HWY_API V ShiftRows(
const V state) {
166 alignas(16)
static constexpr uint8_t kShiftRow[16] = {
171 const auto shift_row =
LoadDup128(du, kShiftRow);
176 HWY_API V MixColumns(
const V state) {
183 alignas(16)
static constexpr uint8_t k2301[16] = {
184 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13};
185 alignas(16)
static constexpr uint8_t k1230[16] = {
186 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12};
190 const auto d =
Xor(
Add(state, state), overflow);
192 const auto d_s2301 =
Xor(d, s2301);
193 const auto t_s2301 =
Xor(state, d_s2301);
195 return Xor(d_s2301, t1230_s3012);
204 state = detail::SubBytes(state);
205 state = detail::ShiftRows(state);
206 state = detail::MixColumns(state);
207 state =
Xor(state, round_key);
217 static_assert(
IsSame<
TFromD<decltype(d)>, uint64_t>(),
"V must be u64");
218 const auto k1 =
Set(d, 0x1111111111111111ULL);
219 const auto k2 =
Set(d, 0x2222222222222222ULL);
220 const auto k4 =
Set(d, 0x4444444444444444ULL);
221 const auto k8 =
Set(d, 0x8888888888888888ULL);
222 const auto a0 =
And(a, k1);
223 const auto a1 =
And(a, k2);
224 const auto a2 =
And(a, k4);
225 const auto a3 =
And(a, k8);
226 const auto b0 =
And(b, k1);
227 const auto b1 =
And(b, k2);
228 const auto b2 =
And(b, k4);
229 const auto b3 =
And(b, k8);
245 static_assert(
IsSame<
TFromD<decltype(d)>, uint64_t>(),
"V must be u64");
246 const auto k1 =
Set(d, 0x1111111111111111ULL);
247 const auto k2 =
Set(d, 0x2222222222222222ULL);
248 const auto k4 =
Set(d, 0x4444444444444444ULL);
249 const auto k8 =
Set(d, 0x8888888888888888ULL);
250 const auto a0 =
And(a, k1);
251 const auto a1 =
And(a, k2);
252 const auto a2 =
And(a, k4);
253 const auto a3 =
And(a, k8);
254 const auto b0 =
And(b, k1);
255 const auto b1 =
And(b, k2);
256 const auto b2 =
And(b, k4);
257 const auto b3 =
And(b, k8);
274 #if (defined(HWY_NATIVE_POPCNT) == defined(HWY_TARGET_TOGGLE))
275 #ifdef HWY_NATIVE_POPCNT
276 #undef HWY_NATIVE_POPCNT
278 #define HWY_NATIVE_POPCNT
281 template <
typename V, HWY_IF_LANES_ARE(u
int8_t, V)>
284 HWY_ALIGN constexpr uint8_t kLookup[16] = {
285 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
287 auto lo =
And(v,
Set(d, 0xF));
288 auto hi = ShiftRight<4>(v);
293 template <
typename V, HWY_IF_LANES_ARE(u
int16_t, V)>
298 return Add(ShiftRight<8>(vals),
And(vals,
Set(d, 0xFF)));
301 template <
typename V, HWY_IF_LANES_ARE(u
int32_t, V)>
306 return Add(ShiftRight<16>(vals),
And(vals,
Set(d, 0xFF)));
309 #if HWY_CAP_INTEGER64
310 template <
typename V, HWY_IF_LANES_ARE(u
int64_t, V)>
315 return Add(ShiftRight<32>(vals),
And(vals,
Set(d, 0xFF)));
#define HWY_MAX(a, b)
Definition: base.h:123
#define HWY_API
Definition: base.h:117
#define HWY_INLINE
Definition: base.h:59
HWY_INLINE Mask128< T, N > Xor(hwy::SizeTag< 1 >, const Mask128< T, N > a, const Mask128< T, N > b)
Definition: x86_128-inl.h:879
HWY_INLINE Vec128< T, N > IfThenElseZero(hwy::SizeTag< 1 >, Mask128< T, N > mask, Vec128< T, N > yes)
Definition: x86_128-inl.h:672
HWY_INLINE Mask128< T, N > And(hwy::SizeTag< 1 >, const Mask128< T, N > a, const Mask128< T, N > b)
Definition: x86_128-inl.h:768
HWY_API Vec< D > SignBit(D d)
Definition: generic_ops-inl.h:66
svuint16_t Set(Simd< bfloat16_t, N > d, bfloat16_t arg)
Definition: arm_sve-inl.h:299
HWY_API uint8_t GetLane(const Vec128< uint8_t, 16 > v)
Definition: arm_neon-inl.h:744
HWY_API Vec128< T, N > PopulationCount(Vec128< T, N > v)
Definition: arm_neon-inl.h:1520
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5035
HWY_API Vec< D > NaN(D d)
Definition: generic_ops-inl.h:74
HWY_API Vec128< T, N > LoadDup128(Simd< T, N > d, const T *const HWY_RESTRICT p)
Definition: arm_neon-inl.h:2164
HWY_API Vec128< uint64_t, N > Min(const Vec128< uint64_t, N > a, const Vec128< uint64_t, N > b)
Definition: arm_neon-inl.h:1879
HWY_API Vec256< uint64_t > CLMulLower(Vec256< uint64_t > a, Vec256< uint64_t > b)
Definition: x86_256-inl.h:3495
HWY_API Vec128< uint64_t, N > Max(const Vec128< uint64_t, N > a, const Vec128< uint64_t, N > b)
Definition: arm_neon-inl.h:1917
HWY_API Mask128< T, N > MaskFromVec(const Vec128< T, N > v)
Definition: arm_neon-inl.h:1600
HWY_INLINE Vec128< uint64_t > MulOdd(Vec128< uint64_t > a, Vec128< uint64_t > b)
Definition: arm_neon-inl.h:3947
HWY_API Vec256< uint8_t > AESRound(Vec256< uint8_t > state, Vec256< uint8_t > round_key)
Definition: x86_256-inl.h:3483
HWY_API Vec128< int64_t > MulEven(Vec128< int32_t > a, Vec128< int32_t > b)
Definition: arm_neon-inl.h:3907
Rebind< MakeSigned< TFromD< D > >, D > RebindToSigned
Definition: shared-inl.h:147
HWY_API V Add(V a, V b)
Definition: arm_neon-inl.h:5000
HWY_API Vec256< uint64_t > CLMulUpper(Vec256< uint64_t > a, Vec256< uint64_t > b)
Definition: x86_256-inl.h:3506
decltype(GetLane(V())) LaneType
Definition: generic_ops-inl.h:24
HWY_API Vec128< T, N > And(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1384
HWY_API Vec128< T, N > BitCast(Simd< T, N > d, Vec128< FromT, N *sizeof(T)/sizeof(FromT)> v)
Definition: arm_neon-inl.h:687
typename D::template Rebind< T > Rebind
Definition: shared-inl.h:144
HWY_API Vec128< T, N > Xor(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1430
HWY_API V Clamp(const V v, const V lo, const V hi)
Definition: generic_ops-inl.h:41
decltype(detail::DeduceD()(V())) DFromV
Definition: arm_neon-inl.h:532
HWY_INLINE constexpr HWY_MAYBE_UNUSED size_t MaxLanes(Simd< T, N >)
Definition: shared-inl.h:194
typename D::template Repartition< T > Repartition
Definition: shared-inl.h:155
decltype(MaskFromVec(Zero(D()))) Mask
Definition: generic_ops-inl.h:37
HWY_API Vec128< TI > TableLookupBytes(const Vec128< T > bytes, const Vec128< TI > from)
Definition: arm_neon-inl.h:3957
HWY_API Vec128< T, N > Zero(Simd< T, N > d)
Definition: arm_neon-inl.h:710
HWY_API V CombineShiftRightLanes(const D d, const V hi, V lo)
Definition: rvv-inl.h:1562
typename D::T TFromD
Definition: shared-inl.h:140
HWY_API VI TableLookupBytesOr0(const V bytes, const VI from)
Definition: arm_neon-inl.h:4012
HWY_API Vec128< T, N > Or(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1419
decltype(Zero(D())) Vec
Definition: generic_ops-inl.h:31
Definition: aligned_allocator.h:23
constexpr HWY_API bool IsSame()
Definition: base.h:260
typename detail::Relations< T >::Unsigned MakeUnsigned
Definition: base.h:521
constexpr T LimitsMax()
Definition: base.h:329
#define HWY_ALIGN
Definition: set_macros-inl.h:78
#define HWY_NAMESPACE
Definition: set_macros-inl.h:77
Definition: shared-inl.h:35