Grok 10.0.1
result-inl.h
Go to the documentation of this file.
1// Copyright 2021 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
17
18// Normal include guard for non-SIMD parts
19#ifndef HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
20#define HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
21
22#include <time.h>
23
24#include <algorithm> // std::sort
25#include <string>
26
27#include "hwy/base.h"
28#include "hwy/nanobenchmark.h"
29
30namespace hwy {
31
32struct Timestamp {
34 double t;
35};
36
37static inline double SecondsSince(const Timestamp& t0) {
38 const Timestamp t1;
39 return t1.t - t0.t;
40}
41
42// Returns trimmed mean (we don't want to run an out-of-L3-cache sort often
43// enough for the mode to be reliable).
44static inline double SummarizeMeasurements(std::vector<double>& seconds) {
45 std::sort(seconds.begin(), seconds.end());
46 double sum = 0;
47 int count = 0;
48 const size_t num = seconds.size();
49 for (size_t i = num / 4; i < num / 2; ++i) {
50 sum += seconds[i];
51 count += 1;
52 }
53 return sum / count;
54}
55
56} // namespace hwy
57#endif // HIGHWAY_HWY_CONTRIB_SORT_RESULT_INL_H_
58
59// Per-target
60#if defined(HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE) == \
61 defined(HWY_TARGET_TOGGLE)
62#ifdef HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
63#undef HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
64#else
65#define HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
66#endif
67
69namespace hwy {
70namespace HWY_NAMESPACE {
71
72struct Result {
73 Result() {}
74 Result(const Algo algo, Dist dist, size_t num_keys, size_t num_threads,
75 double sec, size_t sizeof_key, const std::string& key_name)
77 algo(algo),
78 dist(dist),
81 sec(sec),
84
85 void Print() const {
86 const double bytes = static_cast<double>(num_keys) *
87 static_cast<double>(num_threads) *
88 static_cast<double>(sizeof_key);
89 printf("%10s: %12s: %7s: %9s: %.2E %4.0f MB/s (%2zu threads)\n",
91 DistName(dist), static_cast<double>(num_keys), bytes * 1E-6 / sec,
93 }
94
95 uint32_t target;
98 size_t num_keys = 0;
99 size_t num_threads = 0;
100 double sec = 0.0;
101 size_t sizeof_key = 0;
102 std::string key_name;
103};
104
105template <class Traits, typename LaneType>
106bool VerifySort(Traits st, const InputStats<LaneType>& input_stats,
107 const LaneType* out, size_t num_lanes, const char* caller) {
108 constexpr size_t N1 = st.LanesPerKey();
109 HWY_ASSERT(num_lanes >= N1);
110
111 InputStats<LaneType> output_stats;
112 // Ensure it matches the sort order
113 for (size_t i = 0; i < num_lanes - N1; i += N1) {
114 output_stats.Notify(out[i]);
115 if (N1 == 2) output_stats.Notify(out[i + 1]);
116 // Reverse order instead of checking !Compare1 so we accept equal keys.
117 if (st.Compare1(out + i + N1, out + i)) {
118 printf("%s: i=%d of %d lanes: N1=%d %5.0f %5.0f vs. %5.0f %5.0f\n\n",
119 caller, static_cast<int>(i), static_cast<int>(num_lanes),
120 static_cast<int>(N1), static_cast<double>(out[i + 1]),
121 static_cast<double>(out[i + 0]),
122 static_cast<double>(out[i + N1 + 1]),
123 static_cast<double>(out[i + N1]));
124 HWY_ABORT("%d-bit sort is incorrect\n",
125 static_cast<int>(sizeof(LaneType) * 8 * N1));
126 }
127 }
128 output_stats.Notify(out[num_lanes - N1]);
129 if (N1 == 2) output_stats.Notify(out[num_lanes - N1 + 1]);
130
131 return input_stats == output_stats;
132}
133
134// NOLINTNEXTLINE(google-readability-namespace-comments)
135} // namespace HWY_NAMESPACE
136} // namespace hwy
138
139#endif // HIGHWAY_HWY_CONTRIB_SORT_RESULT_TOGGLE
#define HWY_ABORT(format,...)
Definition: base.h:141
#define HWY_ASSERT(condition)
Definition: base.h:145
Definition: algo-inl.h:117
void Notify(T value)
Definition: algo-inl.h:119
#define HWY_TARGET
Definition: detect_targets.h:341
bool VerifySort(Traits st, const InputStats< LaneType > &input_stats, const LaneType *out, size_t num_lanes, const char *caller)
Definition: result-inl.h:106
decltype(GetLane(V())) LaneType
Definition: generic_ops-inl.h:25
HWY_DLLEXPORT double Now()
Definition: aligned_allocator.h:27
static const char * DistName(Dist dist)
Definition: algo-inl.h:104
static HWY_MAYBE_UNUSED const char * TargetName(uint32_t target)
Definition: targets.h:77
Dist
Definition: algo-inl.h:98
const char * AlgoName(Algo algo)
Definition: algo-inl.h:185
static double SummarizeMeasurements(std::vector< double > &seconds)
Definition: result-inl.h:44
Algo
Definition: algo-inl.h:161
static double SecondsSince(const Timestamp &t0)
Definition: result-inl.h:37
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()
#define HWY_NAMESPACE
Definition: set_macros-inl.h:82
Definition: result-inl.h:72
double sec
Definition: result-inl.h:100
uint32_t target
Definition: result-inl.h:95
Algo algo
Definition: result-inl.h:96
size_t sizeof_key
Definition: result-inl.h:101
Dist dist
Definition: result-inl.h:97
void Print() const
Definition: result-inl.h:85
std::string key_name
Definition: result-inl.h:102
size_t num_threads
Definition: result-inl.h:99
Result()
Definition: result-inl.h:73
Result(const Algo algo, Dist dist, size_t num_keys, size_t num_threads, double sec, size_t sizeof_key, const std::string &key_name)
Definition: result-inl.h:74
size_t num_keys
Definition: result-inl.h:98
Definition: result-inl.h:32
double t
Definition: result-inl.h:34
Timestamp()
Definition: result-inl.h:33