Grok 10.0.1
cache_control.h
Go to the documentation of this file.
1// Copyright 2020 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16#ifndef HIGHWAY_HWY_CACHE_CONTROL_H_
17#define HIGHWAY_HWY_CACHE_CONTROL_H_
18
19#include <stddef.h>
20#include <stdint.h>
21
22#include "hwy/base.h"
23
24// Requires SSE2; fails to compile on 32-bit Clang 7 (see
25// https://github.com/gperftools/gperftools/issues/946).
26#if !defined(__SSE2__) || (HWY_COMPILER_CLANG && HWY_ARCH_X86_32)
27#undef HWY_DISABLE_CACHE_CONTROL
28#define HWY_DISABLE_CACHE_CONTROL
29#endif
30
31// intrin.h is sufficient on MSVC and already included by base.h.
32#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) && !HWY_COMPILER_MSVC
33#include <emmintrin.h> // SSE2
34#endif
35
36// Windows.h #defines these, which causes infinite recursion. Temporarily
37// undefine them in this header; these functions are anyway deprecated.
38// TODO(janwas): remove when these functions are removed.
39#pragma push_macro("LoadFence")
40#undef LoadFence
41
42namespace hwy {
43
44// Even if N*sizeof(T) is smaller, Stream may write a multiple of this size.
45#define HWY_STREAM_MULTIPLE 16
46
47// The following functions may also require an attribute.
48#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) && !HWY_COMPILER_MSVC
49#define HWY_ATTR_CACHE __attribute__((target("sse2")))
50#else
51#define HWY_ATTR_CACHE
52#endif
53
54// Delays subsequent loads until prior loads are visible. On Intel CPUs, also
55// serves as a full fence (waits for all prior instructions to complete).
56// No effect on non-x86.
57// DEPRECATED due to differing behavior across architectures AND vendors.
59#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
60 _mm_lfence();
61#endif
62}
63
64// Ensures values written by previous `Stream` calls are visible on the current
65// core. This is NOT sufficient for synchronizing across cores; when `Stream`
66// outputs are to be consumed by other core(s), the producer must publish
67// availability (e.g. via mutex or atomic_flag) after `FlushStream`.
69#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
70 _mm_sfence();
71#endif
72}
73
74// Optionally begins loading the cache line containing "p" to reduce latency of
75// subsequent actual loads.
76template <typename T>
78#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
79 _mm_prefetch(reinterpret_cast<const char*>(p), _MM_HINT_T0);
80#elif HWY_COMPILER_GCC || HWY_COMPILER_CLANG
81 // Hint=0 (NTA) behavior differs, but skipping outer caches is probably not
82 // desirable, so use the default 3 (keep in caches).
83 __builtin_prefetch(p, /*write=*/0, /*hint=*/3);
84#else
85 (void)p;
86#endif
87}
88
89// Invalidates and flushes the cache line containing "p", if possible.
91#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
92 _mm_clflush(p);
93#else
94 (void)p;
95#endif
96}
97
98// When called inside a spin-loop, may reduce power consumption.
100#if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL)
101 _mm_pause();
102#endif
103}
104
105} // namespace hwy
106
107// TODO(janwas): remove when these functions are removed. (See above.)
108#pragma pop_macro("LoadFence")
109
110#endif // HIGHWAY_HWY_CACHE_CONTROL_H_
#define HWY_INLINE
Definition: base.h:62
#define HWY_ATTR_CACHE
Definition: cache_control.h:51
Definition: aligned_allocator.h:27
HWY_INLINE HWY_ATTR_CACHE void FlushStream()
Definition: cache_control.h:68
HWY_INLINE HWY_ATTR_CACHE void Prefetch(const T *p)
Definition: cache_control.h:77
HWY_INLINE HWY_ATTR_CACHE void Pause()
Definition: cache_control.h:99
HWY_INLINE HWY_ATTR_CACHE void LoadFence()
Definition: cache_control.h:58
HWY_INLINE HWY_ATTR_CACHE void FlushCacheline(const void *p)
Definition: cache_control.h:90