libstdc++
simd_detail.h
1// Internal macros for the simd implementation -*- C++ -*-
2
3// Copyright (C) 2020-2021 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
26#define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
27
28#if __cplusplus >= 201703L
29
30#include <cstddef>
31#include <cstdint>
32
33/// @cond undocumented
34
35#define _GLIBCXX_SIMD_BEGIN_NAMESPACE \
36 namespace std _GLIBCXX_VISIBILITY(default) \
37 { \
38 _GLIBCXX_BEGIN_NAMESPACE_VERSION \
39 namespace experimental { \
40 inline namespace parallelism_v2 {
41#define _GLIBCXX_SIMD_END_NAMESPACE \
42 } \
43 } \
44 _GLIBCXX_END_NAMESPACE_VERSION \
45 }
46
47// ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX
48// macros ARM{{{
49#if defined __ARM_NEON
50#define _GLIBCXX_SIMD_HAVE_NEON 1
51#else
52#define _GLIBCXX_SIMD_HAVE_NEON 0
53#endif
54#if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__)
55#define _GLIBCXX_SIMD_HAVE_NEON_A32 1
56#else
57#define _GLIBCXX_SIMD_HAVE_NEON_A32 0
58#endif
59#if defined __ARM_NEON && defined __aarch64__
60#define _GLIBCXX_SIMD_HAVE_NEON_A64 1
61#else
62#define _GLIBCXX_SIMD_HAVE_NEON_A64 0
63#endif
64//}}}
65// x86{{{
66#ifdef __MMX__
67#define _GLIBCXX_SIMD_HAVE_MMX 1
68#else
69#define _GLIBCXX_SIMD_HAVE_MMX 0
70#endif
71#if defined __SSE__ || defined __x86_64__
72#define _GLIBCXX_SIMD_HAVE_SSE 1
73#else
74#define _GLIBCXX_SIMD_HAVE_SSE 0
75#endif
76#if defined __SSE2__ || defined __x86_64__
77#define _GLIBCXX_SIMD_HAVE_SSE2 1
78#else
79#define _GLIBCXX_SIMD_HAVE_SSE2 0
80#endif
81#ifdef __SSE3__
82#define _GLIBCXX_SIMD_HAVE_SSE3 1
83#else
84#define _GLIBCXX_SIMD_HAVE_SSE3 0
85#endif
86#ifdef __SSSE3__
87#define _GLIBCXX_SIMD_HAVE_SSSE3 1
88#else
89#define _GLIBCXX_SIMD_HAVE_SSSE3 0
90#endif
91#ifdef __SSE4_1__
92#define _GLIBCXX_SIMD_HAVE_SSE4_1 1
93#else
94#define _GLIBCXX_SIMD_HAVE_SSE4_1 0
95#endif
96#ifdef __SSE4_2__
97#define _GLIBCXX_SIMD_HAVE_SSE4_2 1
98#else
99#define _GLIBCXX_SIMD_HAVE_SSE4_2 0
100#endif
101#ifdef __XOP__
102#define _GLIBCXX_SIMD_HAVE_XOP 1
103#else
104#define _GLIBCXX_SIMD_HAVE_XOP 0
105#endif
106#ifdef __AVX__
107#define _GLIBCXX_SIMD_HAVE_AVX 1
108#else
109#define _GLIBCXX_SIMD_HAVE_AVX 0
110#endif
111#ifdef __AVX2__
112#define _GLIBCXX_SIMD_HAVE_AVX2 1
113#else
114#define _GLIBCXX_SIMD_HAVE_AVX2 0
115#endif
116#ifdef __BMI__
117#define _GLIBCXX_SIMD_HAVE_BMI1 1
118#else
119#define _GLIBCXX_SIMD_HAVE_BMI1 0
120#endif
121#ifdef __BMI2__
122#define _GLIBCXX_SIMD_HAVE_BMI2 1
123#else
124#define _GLIBCXX_SIMD_HAVE_BMI2 0
125#endif
126#ifdef __LZCNT__
127#define _GLIBCXX_SIMD_HAVE_LZCNT 1
128#else
129#define _GLIBCXX_SIMD_HAVE_LZCNT 0
130#endif
131#ifdef __SSE4A__
132#define _GLIBCXX_SIMD_HAVE_SSE4A 1
133#else
134#define _GLIBCXX_SIMD_HAVE_SSE4A 0
135#endif
136#ifdef __FMA__
137#define _GLIBCXX_SIMD_HAVE_FMA 1
138#else
139#define _GLIBCXX_SIMD_HAVE_FMA 0
140#endif
141#ifdef __FMA4__
142#define _GLIBCXX_SIMD_HAVE_FMA4 1
143#else
144#define _GLIBCXX_SIMD_HAVE_FMA4 0
145#endif
146#ifdef __F16C__
147#define _GLIBCXX_SIMD_HAVE_F16C 1
148#else
149#define _GLIBCXX_SIMD_HAVE_F16C 0
150#endif
151#ifdef __POPCNT__
152#define _GLIBCXX_SIMD_HAVE_POPCNT 1
153#else
154#define _GLIBCXX_SIMD_HAVE_POPCNT 0
155#endif
156#ifdef __AVX512F__
157#define _GLIBCXX_SIMD_HAVE_AVX512F 1
158#else
159#define _GLIBCXX_SIMD_HAVE_AVX512F 0
160#endif
161#ifdef __AVX512DQ__
162#define _GLIBCXX_SIMD_HAVE_AVX512DQ 1
163#else
164#define _GLIBCXX_SIMD_HAVE_AVX512DQ 0
165#endif
166#ifdef __AVX512VL__
167#define _GLIBCXX_SIMD_HAVE_AVX512VL 1
168#else
169#define _GLIBCXX_SIMD_HAVE_AVX512VL 0
170#endif
171#ifdef __AVX512BW__
172#define _GLIBCXX_SIMD_HAVE_AVX512BW 1
173#else
174#define _GLIBCXX_SIMD_HAVE_AVX512BW 0
175#endif
176
177#if _GLIBCXX_SIMD_HAVE_SSE
178#define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
179#else
180#define _GLIBCXX_SIMD_HAVE_SSE_ABI 0
181#endif
182#if _GLIBCXX_SIMD_HAVE_SSE2
183#define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1
184#else
185#define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0
186#endif
187
188#if _GLIBCXX_SIMD_HAVE_AVX
189#define _GLIBCXX_SIMD_HAVE_AVX_ABI 1
190#else
191#define _GLIBCXX_SIMD_HAVE_AVX_ABI 0
192#endif
193#if _GLIBCXX_SIMD_HAVE_AVX2
194#define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1
195#else
196#define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0
197#endif
198
199#if _GLIBCXX_SIMD_HAVE_AVX512F
200#define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1
201#else
202#define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0
203#endif
204#if _GLIBCXX_SIMD_HAVE_AVX512BW
205#define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1
206#else
207#define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0
208#endif
209
210#if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2
211#error "Use of SSE2 is required on AMD64"
212#endif
213//}}}
214
215#ifdef __clang__
216#define _GLIBCXX_SIMD_NORMAL_MATH
217#else
218#define _GLIBCXX_SIMD_NORMAL_MATH \
219 [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]]
220#endif
221#define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]]
222#define _GLIBCXX_SIMD_INTRINSIC \
223 [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
224#define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
225#define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
226#define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
227
228#if defined __STRICT_ANSI__ && __STRICT_ANSI__
229#define _GLIBCXX_SIMD_CONSTEXPR
230#define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
231#else
232#define _GLIBCXX_SIMD_CONSTEXPR constexpr
233#define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr
234#endif
235
236#if defined __clang__
237#define _GLIBCXX_SIMD_USE_CONSTEXPR const
238#else
239#define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr
240#endif
241
242#define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^)
243#define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>)
244#define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \
245 __macro(+) __macro(-) __macro(*) __macro(/) __macro(%)
246
247#define _GLIBCXX_SIMD_ALL_BINARY(__macro) \
248 _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true)
249#define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \
250 _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true)
251#define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \
252 _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true)
253
254#ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE
255#undef _GLIBCXX_SIMD_ALWAYS_INLINE
256#define _GLIBCXX_SIMD_ALWAYS_INLINE inline
257#undef _GLIBCXX_SIMD_INTRINSIC
258#define _GLIBCXX_SIMD_INTRINSIC inline
259#endif
260
261#if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX
262#define _GLIBCXX_SIMD_X86INTRIN 1
263#else
264#define _GLIBCXX_SIMD_X86INTRIN 0
265#endif
266
267// workaround macros {{{
268// use aliasing loads to help GCC understand the data accesses better
269// This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with
270// fixed_size_simd<float, 16> x.
271#define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1
272
273// vector conversions on x86 not optimized:
274#if _GLIBCXX_SIMD_X86INTRIN
275#define _GLIBCXX_SIMD_WORKAROUND_PR85048 1
276#endif
277
278// integer division not optimized
279#ifndef __clang__
280#define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
281#endif
282
283// very bad codegen for extraction and concatenation of 128/256 "subregisters"
284// with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM
285#if _GLIBCXX_SIMD_X86INTRIN
286#define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1
287#endif
288
289// bad codegen for 8 Byte memcpy to __vector_type_t<char, 16>
290#define _GLIBCXX_SIMD_WORKAROUND_PR90424 1
291
292// bad codegen for zero-extend using simple concat(__x, 0)
293#if _GLIBCXX_SIMD_X86INTRIN
294#define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1
295#endif
296
297// https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type
298// of static_simd_cast)
299#define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1
300
301// https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE
302// constraint on (static)_simd_cast)
303#define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1
304// }}}
305
306/// @endcond
307
308#endif // __cplusplus >= 201703L
309#endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
310
311// vim: foldmethod=marker