17#if defined(HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE) == \
18 defined(HWY_TARGET_TOGGLE)
19#ifdef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
20#undef HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
22#define HIGHWAY_HWY_CONTRIB_SORT_TRAITS128_TOGGLE
36#if VQSORT_ENABLED || HWY_IDE
42 constexpr bool Is128()
const {
return true; }
43 constexpr size_t LanesPerKey()
const {
return 2; }
50 const FixedTag<LaneType, 2>
d;
51 const auto temp =
LoadU(
d, a);
56 template <
class V,
class M>
57 HWY_INLINE V CompressKeys(V keys, M mask)
const {
62 HWY_INLINE Vec<D> SetKey(D
d,
const TFromD<D>* key)
const {
72 HWY_INLINE Vec<D> ReverseKeys2(D ,
const Vec<D>
v)
const {
78 HWY_INLINE Vec<D> ReverseKeys4(D
d,
const Vec<D>
v)
const {
80 return ReverseKeys(
d,
v);
85 HWY_INLINE Vec<D> OddEvenPairs(D
d,
const Vec<D> odd,
86 const Vec<D> even)
const {
92 HWY_INLINE V OddEvenKeys(
const V odd,
const V even)
const {
97 HWY_INLINE Vec<D> ReverseKeys8(D, Vec<D>)
const {
102 HWY_INLINE Vec<D> ReverseKeys16(D, Vec<D>)
const {
108 HWY_INLINE Vec<D> SwapAdjacentPairs(D, Vec<D>)
const {
114 HWY_INLINE Vec<D> SwapAdjacentQuads(D, Vec<D>)
const {
120 HWY_INLINE Vec<D> OddEvenQuads(D, Vec<D>, Vec<D>)
const {
126struct Key128 :
public KeyAny128 {
130 std::string KeyString()
const {
return "U128"; }
140struct OrderAscending128 :
public Key128 {
141 using Order = SortAscending;
144 return (a[1] == b[1]) ? a[0] < b[0] : a[1] < b[1];
148 HWY_INLINE Mask<D> Compare(D
d, Vec<D> a, Vec<D> b)
const {
154 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b)
const {
159 HWY_INLINE Vec<D> First(D
d,
const Vec<D> a,
const Vec<D> b)
const {
164 HWY_INLINE Vec<D> Last(D
d,
const Vec<D> a,
const Vec<D> b)
const {
180struct OrderDescending128 :
public Key128 {
181 using Order = SortDescending;
184 return (a[1] == b[1]) ? b[0] < a[0] : b[1] < a[1];
188 HWY_INLINE Mask<D> Compare(D
d, Vec<D> a, Vec<D> b)
const {
194 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b)
const {
199 HWY_INLINE Vec<D> First(D
d,
const Vec<D> a,
const Vec<D> b)
const {
204 HWY_INLINE Vec<D> Last(D
d,
const Vec<D> a,
const Vec<D> b)
const {
221struct KeyValue128 :
public KeyAny128 {
223 using KeyType = K64V64;
225 std::string KeyString()
const {
return "KV128"; }
228struct OrderAscendingKV128 :
public KeyValue128 {
229 using Order = SortAscending;
236 HWY_INLINE Mask<D> Compare(D
d, Vec<D> a, Vec<D> b)
const {
242 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b)
const {
247 HWY_INLINE Vec<D> First(D
d,
const Vec<D> a,
const Vec<D> b)
const {
252 HWY_INLINE Vec<D> Last(D
d,
const Vec<D> a,
const Vec<D> b)
const {
268struct OrderDescendingKV128 :
public KeyValue128 {
269 using Order = SortDescending;
276 HWY_INLINE Mask<D> Compare(D
d, Vec<D> a, Vec<D> b)
const {
282 HWY_INLINE Mask<DFromV<V> > CompareLanes(V a, V b)
const {
287 HWY_INLINE Vec<D> First(D
d,
const Vec<D> a,
const Vec<D> b)
const {
292 HWY_INLINE Vec<D> Last(D
d,
const Vec<D> a,
const Vec<D> b)
const {
310class Traits128 :
public Base {
312#if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_SVE_256
317 const Base* base =
static_cast<const Base*
>(
this);
318 const Mask<D> eqHL =
Eq(a, b);
319 const Vec<D> ltHL =
VecFromMask(
d, base->CompareLanes(a, b));
320#if HWY_TARGET == HWY_SVE_256
323 const Vec<D> ltLX = ShiftLeftLanes<1>(ltHL);
333#if HWY_TARGET == HWY_SVE_256
334 return svdup_lane_u64(
v, 3);
335#elif HWY_TARGET <= HWY_AVX3
336 return V{_mm512_permutex_epi64(
v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
338 return V{_mm256_permute4x64_epi64(
v.raw, _MM_SHUFFLE(3, 3, 3, 3))};
347 const Base* base =
static_cast<const Base*
>(
this);
350 v = base->SetKey(
d, buf + 0);
351 for (
size_t i = base->LanesPerKey(); i < N; i += base->LanesPerKey()) {
352 v = base->First(
d,
v, base->SetKey(
d, buf + i));
360 const Base* base =
static_cast<const Base*
>(
this);
363 v = base->SetKey(
d, buf + 0);
364 for (
size_t i = base->LanesPerKey(); i < N; i += base->LanesPerKey()) {
365 v = base->Last(
d,
v, base->SetKey(
d, buf + i));
371 HWY_INLINE void Sort2(D
d, Vec<D>& a, Vec<D>& b)
const {
372 const Base* base =
static_cast<const Base*
>(
this);
374 const Vec<D> a_copy = a;
375 const auto lt = base->Compare(
d, a, b);
382 HWY_INLINE Vec<D> SortPairsDistance1(D
d, Vec<D>
v)
const {
383 const Base* base =
static_cast<const Base*
>(
this);
384 Vec<D> swapped = base->ReverseKeys2(
d,
v);
386#if HWY_TARGET <= HWY_AVX2 || HWY_TARGET == HWY_SVE_256
387 const Vec<D> select = ReplicateTop4x(CompareTop(
d,
v, swapped));
390 Sort2(
d,
v, swapped);
391 return base->OddEvenKeys(swapped,
v);
397 HWY_INLINE Vec<D> SortPairsReverse4(D
d, Vec<D>
v)
const {
398 const Base* base =
static_cast<const Base*
>(
this);
399 Vec<D> swapped = base->ReverseKeys4(
d,
v);
402#if HWY_TARGET <= HWY_AVX3
403 const Vec512<uint64_t> outHx = CompareTop(
d,
v, swapped);
406 alignas(64) uint64_t kIndices[8] = {7, 7, 5, 5, 5, 5, 7, 7};
407 const Vec512<uint64_t> select =
411 Sort2(
d,
v, swapped);
412 return base->OddEvenPairs(
d, swapped,
v);
418 HWY_INLINE Vec<D> SortPairsDistance4(D, Vec<D>)
const {
#define HWY_RESTRICT
Definition: base.h:61
#define HWY_INLINE
Definition: base.h:62
#define HWY_DASSERT(condition)
Definition: base.h:191
#define HWY_MAYBE_UNUSED
Definition: base.h:73
#define HWY_ASSERT(condition)
Definition: base.h:145
HWY_INLINE Vec128< T, N > IfThenElse(hwy::SizeTag< 1 >, Mask128< T, N > mask, Vec128< T, N > yes, Vec128< T, N > no)
Definition: x86_128-inl.h:673
d
Definition: rvv-inl.h:1742
HWY_API Vec128< T, N > OddEvenBlocks(Vec128< T, N >, Vec128< T, N > even)
Definition: arm_neon-inl.h:4533
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6309
HWY_API auto Eq(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:6301
HWY_API Vec128< uint64_t > CompressBlocksNot(Vec128< uint64_t > v, Mask128< uint64_t >)
Definition: arm_neon-inl.h:5815
HWY_API Vec128< T, N > IfVecThenElse(Vec128< T, N > mask, Vec128< T, N > yes, Vec128< T, N > no)
Definition: arm_neon-inl.h:2006
HWY_API Vec128< T, N > VecFromMask(Simd< T, N, 0 > d, const Mask128< T, N > v)
Definition: arm_neon-inl.h:2182
HWY_API Vec128< T, N > DupEven(Vec128< T, N > v)
Definition: arm_neon-inl.h:4482
HWY_API constexpr size_t Lanes(Simd< T, N, kPow2 >)
Definition: arm_sve-inl.h:236
HWY_API Vec128< T, N > TableLookupLanes(Vec128< T, N > v, Indices128< T, N > idx)
Definition: arm_neon-inl.h:3934
HWY_API void StoreU(const Vec128< uint8_t > v, Full128< uint8_t >, uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2725
HWY_INLINE VFromD< D > Min128Upper(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:6260
HWY_API Vec128< T, N > SwapAdjacentBlocks(Vec128< T, N > v)
Definition: arm_neon-inl.h:4540
HWY_INLINE VFromD< D > Min128(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:6250
svuint16_t Set(Simd< bfloat16_t, N, kPow2 > d, bfloat16_t arg)
Definition: arm_sve-inl.h:312
HWY_INLINE VFromD< D > Max128Upper(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:6265
HWY_INLINE Mask128< T, N > Lt128(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition: arm_neon-inl.h:6212
decltype(GetLane(V())) LaneType
Definition: generic_ops-inl.h:25
HWY_API Vec128< uint8_t > LoadU(Full128< uint8_t >, const uint8_t *HWY_RESTRICT unaligned)
Definition: arm_neon-inl.h:2544
HWY_API Vec128< T, N > OrAnd(Vec128< T, N > o, Vec128< T, N > a1, Vec128< T, N > a2)
Definition: arm_neon-inl.h:1999
HWY_API Vec128< T, N > ConcatUpperLower(Simd< T, N, 0 > d, Vec128< T, N > hi, Vec128< T, N > lo)
Definition: arm_neon-inl.h:4406
HWY_INLINE VFromD< D > Max128(D d, const VFromD< D > a, const VFromD< D > b)
Definition: arm_neon-inl.h:6255
HWY_API Indices128< T, N > SetTableIndices(Simd< T, N, 0 > d, const TI *idx)
Definition: arm_neon-inl.h:3928
HWY_API Vec128< T, N > LoadDup128(Simd< T, N, 0 > d, const T *const HWY_RESTRICT p)
Definition: arm_neon-inl.h:2718
N
Definition: rvv-inl.h:1742
HWY_API Vec128< T > ReverseBlocks(Full128< T >, const Vec128< T > v)
Definition: arm_neon-inl.h:4548
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2882
HWY_INLINE Mask128< T, N > Lt128Upper(Simd< T, N, 0 > d, Vec128< T, N > a, Vec128< T, N > b)
Definition: arm_neon-inl.h:6240
const vfloat64m1_t v
Definition: rvv-inl.h:1742
Definition: aligned_allocator.h:27
HWY_API constexpr T HighestValue()
Definition: base.h:576
HWY_API constexpr T LowestValue()
Definition: base.h:563
#define HWY_NAMESPACE
Definition: set_macros-inl.h:82