libstdc++
simd_builtin.h
1 // Simd Abi specific implementations -*- C++ -*-
2 
3 // Copyright (C) 2020-2021 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_
27 
28 #if __cplusplus >= 201703L
29 
30 #include <array>
31 #include <cmath>
32 #include <cstdlib>
33 
34 _GLIBCXX_SIMD_BEGIN_NAMESPACE
35 // _S_allbits{{{
36 template <typename _V>
37  static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_allbits
38  = reinterpret_cast<_V>(~__vector_type_t<char, sizeof(_V) / sizeof(char)>());
39 
40 // }}}
41 // _S_signmask, _S_absmask{{{
42 template <typename _V, typename = _VectorTraits<_V>>
43  static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_signmask
44  = __xor(_V() + 1, _V() - 1);
45 
46 template <typename _V, typename = _VectorTraits<_V>>
47  static inline _GLIBCXX_SIMD_USE_CONSTEXPR _V _S_absmask
48  = __andnot(_S_signmask<_V>, _S_allbits<_V>);
49 
50 //}}}
51 // __vector_permute<Indices...>{{{
52 // Index == -1 requests zeroing of the output element
53 template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
54  _Tp
55  __vector_permute(_Tp __x)
56  {
57  static_assert(sizeof...(_Indices) == _TVT::_S_full_size);
58  return __make_vector<typename _TVT::value_type>(
59  (_Indices == -1 ? 0 : __x[_Indices == -1 ? 0 : _Indices])...);
60  }
61 
62 // }}}
63 // __vector_shuffle<Indices...>{{{
64 // Index == -1 requests zeroing of the output element
65 template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
66  _Tp
67  __vector_shuffle(_Tp __x, _Tp __y)
68  {
69  return _Tp{(_Indices == -1 ? 0
70  : _Indices < _TVT::_S_full_size
71  ? __x[_Indices]
72  : __y[_Indices - _TVT::_S_full_size])...};
73  }
74 
75 // }}}
76 // __make_wrapper{{{
77 template <typename _Tp, typename... _Args>
78  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, sizeof...(_Args)>
79  __make_wrapper(const _Args&... __args)
80  { return __make_vector<_Tp>(__args...); }
81 
82 // }}}
83 // __wrapper_bitcast{{{
84 template <typename _Tp, size_t _ToN = 0, typename _Up, size_t _M,
85  size_t _Np = _ToN != 0 ? _ToN : sizeof(_Up) * _M / sizeof(_Tp)>
86  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _Np>
87  __wrapper_bitcast(_SimdWrapper<_Up, _M> __x)
88  {
89  static_assert(_Np > 1);
90  return __intrin_bitcast<__vector_type_t<_Tp, _Np>>(__x._M_data);
91  }
92 
93 // }}}
94 // __shift_elements_right{{{
95 // if (__shift % 2ⁿ == 0) => the low n Bytes are correct
96 template <unsigned __shift, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
97  _GLIBCXX_SIMD_INTRINSIC _Tp
98  __shift_elements_right(_Tp __v)
99  {
100  [[maybe_unused]] const auto __iv = __to_intrin(__v);
101  static_assert(__shift <= sizeof(_Tp));
102  if constexpr (__shift == 0)
103  return __v;
104  else if constexpr (__shift == sizeof(_Tp))
105  return _Tp();
106 #if _GLIBCXX_SIMD_X86INTRIN // {{{
107  else if constexpr (__have_sse && __shift == 8
108  && _TVT::template _S_is<float, 4>)
109  return _mm_movehl_ps(__iv, __iv);
110  else if constexpr (__have_sse2 && __shift == 8
111  && _TVT::template _S_is<double, 2>)
112  return _mm_unpackhi_pd(__iv, __iv);
113  else if constexpr (__have_sse2 && sizeof(_Tp) == 16)
114  return reinterpret_cast<typename _TVT::type>(
115  _mm_srli_si128(reinterpret_cast<__m128i>(__iv), __shift));
116  else if constexpr (__shift == 16 && sizeof(_Tp) == 32)
117  {
118  /*if constexpr (__have_avx && _TVT::template _S_is<double, 4>)
119  return _mm256_permute2f128_pd(__iv, __iv, 0x81);
120  else if constexpr (__have_avx && _TVT::template _S_is<float, 8>)
121  return _mm256_permute2f128_ps(__iv, __iv, 0x81);
122  else if constexpr (__have_avx)
123  return reinterpret_cast<typename _TVT::type>(
124  _mm256_permute2f128_si256(__iv, __iv, 0x81));
125  else*/
126  return __zero_extend(__hi128(__v));
127  }
128  else if constexpr (__have_avx2 && sizeof(_Tp) == 32 && __shift < 16)
129  {
130  const auto __vll = __vector_bitcast<_LLong>(__v);
131  return reinterpret_cast<typename _TVT::type>(
132  _mm256_alignr_epi8(_mm256_permute2x128_si256(__vll, __vll, 0x81),
133  __vll, __shift));
134  }
135  else if constexpr (__have_avx && sizeof(_Tp) == 32 && __shift < 16)
136  {
137  const auto __vll = __vector_bitcast<_LLong>(__v);
138  return reinterpret_cast<typename _TVT::type>(
139  __concat(_mm_alignr_epi8(__hi128(__vll), __lo128(__vll), __shift),
140  _mm_srli_si128(__hi128(__vll), __shift)));
141  }
142  else if constexpr (sizeof(_Tp) == 32 && __shift > 16)
143  return __zero_extend(__shift_elements_right<__shift - 16>(__hi128(__v)));
144  else if constexpr (sizeof(_Tp) == 64 && __shift == 32)
145  return __zero_extend(__hi256(__v));
146  else if constexpr (__have_avx512f && sizeof(_Tp) == 64)
147  {
148  if constexpr (__shift >= 48)
149  return __zero_extend(
150  __shift_elements_right<__shift - 48>(__extract<3, 4>(__v)));
151  else if constexpr (__shift >= 32)
152  return __zero_extend(
153  __shift_elements_right<__shift - 32>(__hi256(__v)));
154  else if constexpr (__shift % 8 == 0)
155  return reinterpret_cast<typename _TVT::type>(
156  _mm512_alignr_epi64(__m512i(), __intrin_bitcast<__m512i>(__v),
157  __shift / 8));
158  else if constexpr (__shift % 4 == 0)
159  return reinterpret_cast<typename _TVT::type>(
160  _mm512_alignr_epi32(__m512i(), __intrin_bitcast<__m512i>(__v),
161  __shift / 4));
162  else if constexpr (__have_avx512bw && __shift < 16)
163  {
164  const auto __vll = __vector_bitcast<_LLong>(__v);
165  return reinterpret_cast<typename _TVT::type>(
166  _mm512_alignr_epi8(_mm512_shuffle_i32x4(__vll, __vll, 0xf9),
167  __vll, __shift));
168  }
169  else if constexpr (__have_avx512bw && __shift < 32)
170  {
171  const auto __vll = __vector_bitcast<_LLong>(__v);
172  return reinterpret_cast<typename _TVT::type>(
173  _mm512_alignr_epi8(_mm512_shuffle_i32x4(__vll, __m512i(), 0xee),
174  _mm512_shuffle_i32x4(__vll, __vll, 0xf9),
175  __shift - 16));
176  }
177  else
178  __assert_unreachable<_Tp>();
179  }
180  /*
181  } else if constexpr (__shift % 16 == 0 && sizeof(_Tp) == 64)
182  return __auto_bitcast(__extract<__shift / 16, 4>(__v));
183  */
184 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
185  else
186  {
187  constexpr int __chunksize = __shift % 8 == 0 ? 8
188  : __shift % 4 == 0 ? 4
189  : __shift % 2 == 0 ? 2
190  : 1;
191  auto __w = __vector_bitcast<__int_with_sizeof_t<__chunksize>>(__v);
192  using _Up = decltype(__w);
193  return __intrin_bitcast<_Tp>(
194  __call_with_n_evaluations<(sizeof(_Tp) - __shift) / __chunksize>(
195  [](auto... __chunks) { return _Up{__chunks...}; },
196  [&](auto __i) { return __w[__shift / __chunksize + __i]; }));
197  }
198  }
199 
200 // }}}
201 // __extract_part(_SimdWrapper<_Tp, _Np>) {{{
202 template <int _Index, int _Total, int _Combine, typename _Tp, size_t _Np>
203  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST
204  _SimdWrapper<_Tp, _Np / _Total * _Combine>
205  __extract_part(const _SimdWrapper<_Tp, _Np> __x)
206  {
207  if constexpr (_Index % 2 == 0 && _Total % 2 == 0 && _Combine % 2 == 0)
208  return __extract_part<_Index / 2, _Total / 2, _Combine / 2>(__x);
209  else
210  {
211  constexpr size_t __values_per_part = _Np / _Total;
212  constexpr size_t __values_to_skip = _Index * __values_per_part;
213  constexpr size_t __return_size = __values_per_part * _Combine;
214  using _R = __vector_type_t<_Tp, __return_size>;
215  static_assert((_Index + _Combine) * __values_per_part * sizeof(_Tp)
216  <= sizeof(__x),
217  "out of bounds __extract_part");
218  // the following assertion would ensure no "padding" to be read
219  // static_assert(_Total >= _Index + _Combine, "_Total must be greater
220  // than _Index");
221 
222  // static_assert(__return_size * _Total == _Np, "_Np must be divisible
223  // by _Total");
224  if (__x._M_is_constprop())
225  return __generate_from_n_evaluations<__return_size, _R>(
226  [&](auto __i) { return __x[__values_to_skip + __i]; });
227  if constexpr (_Index == 0 && _Total == 1)
228  return __x;
229  else if constexpr (_Index == 0)
230  return __intrin_bitcast<_R>(__as_vector(__x));
231 #if _GLIBCXX_SIMD_X86INTRIN // {{{
232  else if constexpr (sizeof(__x) == 32
233  && __return_size * sizeof(_Tp) <= 16)
234  {
235  constexpr size_t __bytes_to_skip = __values_to_skip * sizeof(_Tp);
236  if constexpr (__bytes_to_skip == 16)
237  return __vector_bitcast<_Tp, __return_size>(
238  __hi128(__as_vector(__x)));
239  else
240  return __vector_bitcast<_Tp, __return_size>(
241  _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
242  __lo128(__vector_bitcast<_LLong>(__x)),
243  __bytes_to_skip));
244  }
245 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
246  else if constexpr (_Index > 0
247  && (__values_to_skip % __return_size != 0
248  || sizeof(_R) >= 8)
249  && (__values_to_skip + __return_size) * sizeof(_Tp)
250  <= 64
251  && sizeof(__x) >= 16)
252  return __intrin_bitcast<_R>(
253  __shift_elements_right<__values_to_skip * sizeof(_Tp)>(
254  __as_vector(__x)));
255  else
256  {
257  _R __r = {};
258  __builtin_memcpy(&__r,
259  reinterpret_cast<const char*>(&__x)
260  + sizeof(_Tp) * __values_to_skip,
261  __return_size * sizeof(_Tp));
262  return __r;
263  }
264  }
265  }
266 
267 // }}}
268 // __extract_part(_SimdWrapper<bool, _Np>) {{{
269 template <int _Index, int _Total, int _Combine = 1, size_t _Np>
270  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _Np / _Total * _Combine>
271  __extract_part(const _SimdWrapper<bool, _Np> __x)
272  {
273  static_assert(_Combine == 1, "_Combine != 1 not implemented");
274  static_assert(__have_avx512f && _Np == _Np);
275  static_assert(_Total >= 2 && _Index + _Combine <= _Total && _Index >= 0);
276  return __x._M_data >> (_Index * _Np / _Total);
277  }
278 
279 // }}}
280 
281 // __vector_convert {{{
282 // implementation requires an index sequence
283 template <typename _To, typename _From, size_t... _I>
284  _GLIBCXX_SIMD_INTRINSIC constexpr _To
285  __vector_convert(_From __a, index_sequence<_I...>)
286  {
287  using _Tp = typename _VectorTraits<_To>::value_type;
288  return _To{static_cast<_Tp>(__a[_I])...};
289  }
290 
291 template <typename _To, typename _From, size_t... _I>
292  _GLIBCXX_SIMD_INTRINSIC constexpr _To
293  __vector_convert(_From __a, _From __b, index_sequence<_I...>)
294  {
295  using _Tp = typename _VectorTraits<_To>::value_type;
296  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...};
297  }
298 
299 template <typename _To, typename _From, size_t... _I>
300  _GLIBCXX_SIMD_INTRINSIC constexpr _To
301  __vector_convert(_From __a, _From __b, _From __c, index_sequence<_I...>)
302  {
303  using _Tp = typename _VectorTraits<_To>::value_type;
304  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
305  static_cast<_Tp>(__c[_I])...};
306  }
307 
308 template <typename _To, typename _From, size_t... _I>
309  _GLIBCXX_SIMD_INTRINSIC constexpr _To
310  __vector_convert(_From __a, _From __b, _From __c, _From __d,
311  index_sequence<_I...>)
312  {
313  using _Tp = typename _VectorTraits<_To>::value_type;
314  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
315  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...};
316  }
317 
318 template <typename _To, typename _From, size_t... _I>
319  _GLIBCXX_SIMD_INTRINSIC constexpr _To
320  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
321  index_sequence<_I...>)
322  {
323  using _Tp = typename _VectorTraits<_To>::value_type;
324  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
325  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
326  static_cast<_Tp>(__e[_I])...};
327  }
328 
329 template <typename _To, typename _From, size_t... _I>
330  _GLIBCXX_SIMD_INTRINSIC constexpr _To
331  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
332  _From __f, index_sequence<_I...>)
333  {
334  using _Tp = typename _VectorTraits<_To>::value_type;
335  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
336  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
337  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...};
338  }
339 
340 template <typename _To, typename _From, size_t... _I>
341  _GLIBCXX_SIMD_INTRINSIC constexpr _To
342  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
343  _From __f, _From __g, index_sequence<_I...>)
344  {
345  using _Tp = typename _VectorTraits<_To>::value_type;
346  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
347  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
348  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
349  static_cast<_Tp>(__g[_I])...};
350  }
351 
352 template <typename _To, typename _From, size_t... _I>
353  _GLIBCXX_SIMD_INTRINSIC constexpr _To
354  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
355  _From __f, _From __g, _From __h, index_sequence<_I...>)
356  {
357  using _Tp = typename _VectorTraits<_To>::value_type;
358  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
359  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
360  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
361  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...};
362  }
363 
364 template <typename _To, typename _From, size_t... _I>
365  _GLIBCXX_SIMD_INTRINSIC constexpr _To
366  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
367  _From __f, _From __g, _From __h, _From __i,
368  index_sequence<_I...>)
369  {
370  using _Tp = typename _VectorTraits<_To>::value_type;
371  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
372  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
373  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
374  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
375  static_cast<_Tp>(__i[_I])...};
376  }
377 
378 template <typename _To, typename _From, size_t... _I>
379  _GLIBCXX_SIMD_INTRINSIC constexpr _To
380  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
381  _From __f, _From __g, _From __h, _From __i, _From __j,
382  index_sequence<_I...>)
383  {
384  using _Tp = typename _VectorTraits<_To>::value_type;
385  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
386  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
387  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
388  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
389  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...};
390  }
391 
392 template <typename _To, typename _From, size_t... _I>
393  _GLIBCXX_SIMD_INTRINSIC constexpr _To
394  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
395  _From __f, _From __g, _From __h, _From __i, _From __j,
396  _From __k, index_sequence<_I...>)
397  {
398  using _Tp = typename _VectorTraits<_To>::value_type;
399  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
400  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
401  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
402  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
403  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
404  static_cast<_Tp>(__k[_I])...};
405  }
406 
407 template <typename _To, typename _From, size_t... _I>
408  _GLIBCXX_SIMD_INTRINSIC constexpr _To
409  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
410  _From __f, _From __g, _From __h, _From __i, _From __j,
411  _From __k, _From __l, index_sequence<_I...>)
412  {
413  using _Tp = typename _VectorTraits<_To>::value_type;
414  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
415  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
416  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
417  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
418  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
419  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...};
420  }
421 
422 template <typename _To, typename _From, size_t... _I>
423  _GLIBCXX_SIMD_INTRINSIC constexpr _To
424  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
425  _From __f, _From __g, _From __h, _From __i, _From __j,
426  _From __k, _From __l, _From __m, index_sequence<_I...>)
427  {
428  using _Tp = typename _VectorTraits<_To>::value_type;
429  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
430  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
431  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
432  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
433  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
434  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
435  static_cast<_Tp>(__m[_I])...};
436  }
437 
438 template <typename _To, typename _From, size_t... _I>
439  _GLIBCXX_SIMD_INTRINSIC constexpr _To
440  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
441  _From __f, _From __g, _From __h, _From __i, _From __j,
442  _From __k, _From __l, _From __m, _From __n,
443  index_sequence<_I...>)
444  {
445  using _Tp = typename _VectorTraits<_To>::value_type;
446  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
447  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
448  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
449  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
450  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
451  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
452  static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])...};
453  }
454 
455 template <typename _To, typename _From, size_t... _I>
456  _GLIBCXX_SIMD_INTRINSIC constexpr _To
457  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
458  _From __f, _From __g, _From __h, _From __i, _From __j,
459  _From __k, _From __l, _From __m, _From __n, _From __o,
460  index_sequence<_I...>)
461  {
462  using _Tp = typename _VectorTraits<_To>::value_type;
463  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
464  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
465  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
466  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
467  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
468  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
469  static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])...,
470  static_cast<_Tp>(__o[_I])...};
471  }
472 
473 template <typename _To, typename _From, size_t... _I>
474  _GLIBCXX_SIMD_INTRINSIC constexpr _To
475  __vector_convert(_From __a, _From __b, _From __c, _From __d, _From __e,
476  _From __f, _From __g, _From __h, _From __i, _From __j,
477  _From __k, _From __l, _From __m, _From __n, _From __o,
478  _From __p, index_sequence<_I...>)
479  {
480  using _Tp = typename _VectorTraits<_To>::value_type;
481  return _To{static_cast<_Tp>(__a[_I])..., static_cast<_Tp>(__b[_I])...,
482  static_cast<_Tp>(__c[_I])..., static_cast<_Tp>(__d[_I])...,
483  static_cast<_Tp>(__e[_I])..., static_cast<_Tp>(__f[_I])...,
484  static_cast<_Tp>(__g[_I])..., static_cast<_Tp>(__h[_I])...,
485  static_cast<_Tp>(__i[_I])..., static_cast<_Tp>(__j[_I])...,
486  static_cast<_Tp>(__k[_I])..., static_cast<_Tp>(__l[_I])...,
487  static_cast<_Tp>(__m[_I])..., static_cast<_Tp>(__n[_I])...,
488  static_cast<_Tp>(__o[_I])..., static_cast<_Tp>(__p[_I])...};
489  }
490 
491 // Defer actual conversion to the overload that takes an index sequence. Note
492 // that this function adds zeros or drops values off the end if you don't ensure
493 // matching width.
494 template <typename _To, typename... _From, size_t _FromSize>
495  _GLIBCXX_SIMD_INTRINSIC constexpr _To
496  __vector_convert(_SimdWrapper<_From, _FromSize>... __xs)
497  {
498 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
499  using _From0 = __first_of_pack_t<_From...>;
500  using _FW = _SimdWrapper<_From0, _FromSize>;
501  if (!_FW::_S_is_partial && !(... && __xs._M_is_constprop()))
502  {
503  if constexpr ((sizeof...(_From) & (sizeof...(_From) - 1))
504  == 0) // power-of-two number of arguments
505  return __convert_x86<_To>(__as_vector(__xs)...);
506  else // append zeros and recurse until the above branch is taken
507  return __vector_convert<_To>(__xs..., _FW{});
508  }
509  else
510 #endif
511  return __vector_convert<_To>(
512  __as_vector(__xs)...,
513  make_index_sequence<(sizeof...(__xs) == 1 ? std::min(
514  _VectorTraits<_To>::_S_full_size, int(_FromSize))
515  : _FromSize)>());
516  }
517 
518 // }}}
519 // __convert function{{{
520 template <typename _To, typename _From, typename... _More>
521  _GLIBCXX_SIMD_INTRINSIC constexpr auto
522  __convert(_From __v0, _More... __vs)
523  {
524  static_assert((true && ... && is_same_v<_From, _More>) );
525  if constexpr (__is_vectorizable_v<_From>)
526  {
527  using _V = typename _VectorTraits<_To>::type;
528  using _Tp = typename _VectorTraits<_To>::value_type;
529  return _V{static_cast<_Tp>(__v0), static_cast<_Tp>(__vs)...};
530  }
531  else if constexpr (__is_vector_type_v<_From>)
532  return __convert<_To>(__as_wrapper(__v0), __as_wrapper(__vs)...);
533  else // _SimdWrapper arguments
534  {
535  constexpr size_t __input_size = _From::_S_size * (1 + sizeof...(_More));
536  if constexpr (__is_vectorizable_v<_To>)
537  return __convert<__vector_type_t<_To, __input_size>>(__v0, __vs...);
538  else if constexpr (!__is_vector_type_v<_To>)
539  return _To(__convert<typename _To::_BuiltinType>(__v0, __vs...));
540  else
541  {
542  static_assert(
543  sizeof...(_More) == 0
544  || _VectorTraits<_To>::_S_full_size >= __input_size,
545  "__convert(...) requires the input to fit into the output");
546  return __vector_convert<_To>(__v0, __vs...);
547  }
548  }
549  }
550 
551 // }}}
552 // __convert_all{{{
553 // Converts __v into array<_To, N>, where N is _NParts if non-zero or
554 // otherwise deduced from _To such that N * #elements(_To) <= #elements(__v).
555 // Note: this function may return less than all converted elements
556 template <typename _To,
557  size_t _NParts = 0, // allows to convert fewer or more (only last
558  // _To, to be partially filled) than all
559  size_t _Offset = 0, // where to start, # of elements (not Bytes or
560  // Parts)
561  typename _From, typename _FromVT = _VectorTraits<_From>>
562  _GLIBCXX_SIMD_INTRINSIC auto
563  __convert_all(_From __v)
564  {
565  if constexpr (is_arithmetic_v<_To> && _NParts != 1)
566  {
567  static_assert(_Offset < _FromVT::_S_full_size);
568  constexpr auto _Np
569  = _NParts == 0 ? _FromVT::_S_partial_width - _Offset : _NParts;
570  return __generate_from_n_evaluations<_Np, array<_To, _Np>>(
571  [&](auto __i) { return static_cast<_To>(__v[__i + _Offset]); });
572  }
573  else
574  {
575  static_assert(__is_vector_type_v<_To>);
576  using _ToVT = _VectorTraits<_To>;
577  if constexpr (__is_vector_type_v<_From>)
578  return __convert_all<_To, _NParts>(__as_wrapper(__v));
579  else if constexpr (_NParts == 1)
580  {
581  static_assert(_Offset % _ToVT::_S_full_size == 0);
582  return array<_To, 1>{__vector_convert<_To>(
583  __extract_part<_Offset / _ToVT::_S_full_size,
584  __div_roundup(_FromVT::_S_partial_width,
585  _ToVT::_S_full_size)>(__v))};
586  }
587 #if _GLIBCXX_SIMD_X86INTRIN // {{{
588  else if constexpr (!__have_sse4_1 && _Offset == 0
589  && is_integral_v<typename _FromVT::value_type>
590  && sizeof(typename _FromVT::value_type)
591  < sizeof(typename _ToVT::value_type)
592  && !(sizeof(typename _FromVT::value_type) == 4
593  && is_same_v<typename _ToVT::value_type, double>))
594  {
595  using _ToT = typename _ToVT::value_type;
596  using _FromT = typename _FromVT::value_type;
597  constexpr size_t _Np
598  = _NParts != 0
599  ? _NParts
600  : (_FromVT::_S_partial_width / _ToVT::_S_full_size);
601  using _R = array<_To, _Np>;
602  // __adjust modifies its input to have _Np (use _SizeConstant)
603  // entries so that no unnecessary intermediate conversions are
604  // requested and, more importantly, no intermediate conversions are
605  // missing
606  [[maybe_unused]] auto __adjust
607  = [](auto __n,
608  auto __vv) -> _SimdWrapper<_FromT, decltype(__n)::value> {
609  return __vector_bitcast<_FromT, decltype(__n)::value>(__vv);
610  };
611  [[maybe_unused]] const auto __vi = __to_intrin(__v);
612  auto&& __make_array = [](auto __x0, [[maybe_unused]] auto __x1) {
613  if constexpr (_Np == 1)
614  return _R{__intrin_bitcast<_To>(__x0)};
615  else
616  return _R{__intrin_bitcast<_To>(__x0),
617  __intrin_bitcast<_To>(__x1)};
618  };
619 
620  if constexpr (_Np == 0)
621  return _R{};
622  else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) == 2)
623  {
624  static_assert(is_integral_v<_FromT>);
625  static_assert(is_integral_v<_ToT>);
626  if constexpr (is_unsigned_v<_FromT>)
627  return __make_array(_mm_unpacklo_epi8(__vi, __m128i()),
628  _mm_unpackhi_epi8(__vi, __m128i()));
629  else
630  return __make_array(
631  _mm_srai_epi16(_mm_unpacklo_epi8(__vi, __vi), 8),
632  _mm_srai_epi16(_mm_unpackhi_epi8(__vi, __vi), 8));
633  }
634  else if constexpr (sizeof(_FromT) == 2 && sizeof(_ToT) == 4)
635  {
636  static_assert(is_integral_v<_FromT>);
637  if constexpr (is_floating_point_v<_ToT>)
638  {
639  const auto __ints
640  = __convert_all<__vector_type16_t<int>, _Np>(
641  __adjust(_SizeConstant<_Np * 4>(), __v));
642  return __generate_from_n_evaluations<_Np, _R>(
643  [&](auto __i) {
644  return __vector_convert<_To>(__as_wrapper(__ints[__i]));
645  });
646  }
647  else if constexpr (is_unsigned_v<_FromT>)
648  return __make_array(_mm_unpacklo_epi16(__vi, __m128i()),
649  _mm_unpackhi_epi16(__vi, __m128i()));
650  else
651  return __make_array(
652  _mm_srai_epi32(_mm_unpacklo_epi16(__vi, __vi), 16),
653  _mm_srai_epi32(_mm_unpackhi_epi16(__vi, __vi), 16));
654  }
655  else if constexpr (sizeof(_FromT) == 4 && sizeof(_ToT) == 8
656  && is_integral_v<_FromT> && is_integral_v<_ToT>)
657  {
658  if constexpr (is_unsigned_v<_FromT>)
659  return __make_array(_mm_unpacklo_epi32(__vi, __m128i()),
660  _mm_unpackhi_epi32(__vi, __m128i()));
661  else
662  return __make_array(
663  _mm_unpacklo_epi32(__vi, _mm_srai_epi32(__vi, 31)),
664  _mm_unpackhi_epi32(__vi, _mm_srai_epi32(__vi, 31)));
665  }
666  else if constexpr (sizeof(_FromT) == 4 && sizeof(_ToT) == 8
667  && is_integral_v<_FromT> && is_integral_v<_ToT>)
668  {
669  if constexpr (is_unsigned_v<_FromT>)
670  return __make_array(_mm_unpacklo_epi32(__vi, __m128i()),
671  _mm_unpackhi_epi32(__vi, __m128i()));
672  else
673  return __make_array(
674  _mm_unpacklo_epi32(__vi, _mm_srai_epi32(__vi, 31)),
675  _mm_unpackhi_epi32(__vi, _mm_srai_epi32(__vi, 31)));
676  }
677  else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) >= 4
678  && is_signed_v<_FromT>)
679  {
680  const __m128i __vv[2] = {_mm_unpacklo_epi8(__vi, __vi),
681  _mm_unpackhi_epi8(__vi, __vi)};
682  const __vector_type_t<int, 4> __vvvv[4] = {
683  __vector_bitcast<int>(_mm_unpacklo_epi16(__vv[0], __vv[0])),
684  __vector_bitcast<int>(_mm_unpackhi_epi16(__vv[0], __vv[0])),
685  __vector_bitcast<int>(_mm_unpacklo_epi16(__vv[1], __vv[1])),
686  __vector_bitcast<int>(_mm_unpackhi_epi16(__vv[1], __vv[1]))};
687  if constexpr (sizeof(_ToT) == 4)
688  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
689  return __vector_convert<_To>(
690  _SimdWrapper<int, 4>(__vvvv[__i] >> 24));
691  });
692  else if constexpr (is_integral_v<_ToT>)
693  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
694  const auto __signbits = __to_intrin(__vvvv[__i / 2] >> 31);
695  const auto __sx32 = __to_intrin(__vvvv[__i / 2] >> 24);
696  return __vector_bitcast<_ToT>(
697  __i % 2 == 0 ? _mm_unpacklo_epi32(__sx32, __signbits)
698  : _mm_unpackhi_epi32(__sx32, __signbits));
699  });
700  else
701  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
702  const _SimdWrapper<int, 4> __int4 = __vvvv[__i / 2] >> 24;
703  return __vector_convert<_To>(
704  __i % 2 == 0 ? __int4
705  : _SimdWrapper<int, 4>(
706  _mm_unpackhi_epi64(__to_intrin(__int4),
707  __to_intrin(__int4))));
708  });
709  }
710  else if constexpr (sizeof(_FromT) == 1 && sizeof(_ToT) == 4)
711  {
712  const auto __shorts = __convert_all<__vector_type16_t<
713  conditional_t<is_signed_v<_FromT>, short, unsigned short>>>(
714  __adjust(_SizeConstant<(_Np + 1) / 2 * 8>(), __v));
715  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
716  return __convert_all<_To>(__shorts[__i / 2])[__i % 2];
717  });
718  }
719  else if constexpr (sizeof(_FromT) == 2 && sizeof(_ToT) == 8
720  && is_signed_v<_FromT> && is_integral_v<_ToT>)
721  {
722  const __m128i __vv[2] = {_mm_unpacklo_epi16(__vi, __vi),
723  _mm_unpackhi_epi16(__vi, __vi)};
724  const __vector_type16_t<int> __vvvv[4]
725  = {__vector_bitcast<int>(
726  _mm_unpacklo_epi32(_mm_srai_epi32(__vv[0], 16),
727  _mm_srai_epi32(__vv[0], 31))),
728  __vector_bitcast<int>(
729  _mm_unpackhi_epi32(_mm_srai_epi32(__vv[0], 16),
730  _mm_srai_epi32(__vv[0], 31))),
731  __vector_bitcast<int>(
732  _mm_unpacklo_epi32(_mm_srai_epi32(__vv[1], 16),
733  _mm_srai_epi32(__vv[1], 31))),
734  __vector_bitcast<int>(
735  _mm_unpackhi_epi32(_mm_srai_epi32(__vv[1], 16),
736  _mm_srai_epi32(__vv[1], 31)))};
737  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
738  return __vector_bitcast<_ToT>(__vvvv[__i]);
739  });
740  }
741  else if constexpr (sizeof(_FromT) <= 2 && sizeof(_ToT) == 8)
742  {
743  const auto __ints
744  = __convert_all<__vector_type16_t<conditional_t<
745  is_signed_v<_FromT> || is_floating_point_v<_ToT>, int,
746  unsigned int>>>(
747  __adjust(_SizeConstant<(_Np + 1) / 2 * 4>(), __v));
748  return __generate_from_n_evaluations<_Np, _R>([&](auto __i) {
749  return __convert_all<_To>(__ints[__i / 2])[__i % 2];
750  });
751  }
752  else
753  __assert_unreachable<_To>();
754  }
755 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
756  else if constexpr ((_FromVT::_S_partial_width - _Offset)
757  > _ToVT::_S_full_size)
758  {
759  /*
760  static_assert(
761  (_FromVT::_S_partial_width & (_FromVT::_S_partial_width - 1)) ==
762  0,
763  "__convert_all only supports power-of-2 number of elements.
764  Otherwise " "the return type cannot be array<_To, N>.");
765  */
766  constexpr size_t _NTotal
767  = (_FromVT::_S_partial_width - _Offset) / _ToVT::_S_full_size;
768  constexpr size_t _Np = _NParts == 0 ? _NTotal : _NParts;
769  static_assert(
770  _Np <= _NTotal
771  || (_Np == _NTotal + 1
772  && (_FromVT::_S_partial_width - _Offset) % _ToVT::_S_full_size
773  > 0));
774  using _R = array<_To, _Np>;
775  if constexpr (_Np == 1)
776  return _R{__vector_convert<_To>(
777  __extract_part<_Offset, _FromVT::_S_partial_width,
778  _ToVT::_S_full_size>(__v))};
779  else
780  return __generate_from_n_evaluations<_Np, _R>([&](
781  auto __i) constexpr {
782  auto __part
783  = __extract_part<__i * _ToVT::_S_full_size + _Offset,
784  _FromVT::_S_partial_width,
785  _ToVT::_S_full_size>(__v);
786  return __vector_convert<_To>(__part);
787  });
788  }
789  else if constexpr (_Offset == 0)
790  return array<_To, 1>{__vector_convert<_To>(__v)};
791  else
792  return array<_To, 1>{__vector_convert<_To>(
793  __extract_part<_Offset, _FromVT::_S_partial_width,
794  _FromVT::_S_partial_width - _Offset>(__v))};
795  }
796  }
797 
798 // }}}
799 
800 // _GnuTraits {{{
801 template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
802  struct _GnuTraits
803  {
804  using _IsValid = true_type;
805  using _SimdImpl = typename _Abi::_SimdImpl;
806  using _MaskImpl = typename _Abi::_MaskImpl;
807 
808  // simd and simd_mask member types {{{
809  using _SimdMember = _SimdWrapper<_Tp, _Np>;
810  using _MaskMember = _SimdWrapper<_Mp, _Np>;
811  static constexpr size_t _S_simd_align = alignof(_SimdMember);
812  static constexpr size_t _S_mask_align = alignof(_MaskMember);
813 
814  // }}}
815  // size metadata {{{
816  static constexpr size_t _S_full_size = _SimdMember::_S_full_size;
817  static constexpr bool _S_is_partial = _SimdMember::_S_is_partial;
818 
819  // }}}
820  // _SimdBase / base class for simd, providing extra conversions {{{
821  struct _SimdBase2
822  {
823  explicit operator __intrinsic_type_t<_Tp, _Np>() const
824  {
825  return __to_intrin(static_cast<const simd<_Tp, _Abi>*>(this)->_M_data);
826  }
827  explicit operator __vector_type_t<_Tp, _Np>() const
828  {
829  return static_cast<const simd<_Tp, _Abi>*>(this)->_M_data.__builtin();
830  }
831  };
832 
833  struct _SimdBase1
834  {
835  explicit operator __intrinsic_type_t<_Tp, _Np>() const
836  { return __data(*static_cast<const simd<_Tp, _Abi>*>(this)); }
837  };
838 
839  using _SimdBase = conditional_t<
840  is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value,
841  _SimdBase1, _SimdBase2>;
842 
843  // }}}
844  // _MaskBase {{{
845  struct _MaskBase2
846  {
847  explicit operator __intrinsic_type_t<_Tp, _Np>() const
848  {
849  return static_cast<const simd_mask<_Tp, _Abi>*>(this)
850  ->_M_data.__intrin();
851  }
852  explicit operator __vector_type_t<_Tp, _Np>() const
853  {
854  return static_cast<const simd_mask<_Tp, _Abi>*>(this)->_M_data._M_data;
855  }
856  };
857 
858  struct _MaskBase1
859  {
860  explicit operator __intrinsic_type_t<_Tp, _Np>() const
861  { return __data(*static_cast<const simd_mask<_Tp, _Abi>*>(this)); }
862  };
863 
864  using _MaskBase = conditional_t<
865  is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value,
866  _MaskBase1, _MaskBase2>;
867 
868  // }}}
869  // _MaskCastType {{{
870  // parameter type of one explicit simd_mask constructor
871  class _MaskCastType
872  {
873  using _Up = __intrinsic_type_t<_Tp, _Np>;
874  _Up _M_data;
875 
876  public:
877  _MaskCastType(_Up __x) : _M_data(__x) {}
878  operator _MaskMember() const { return _M_data; }
879  };
880 
881  // }}}
882  // _SimdCastType {{{
883  // parameter type of one explicit simd constructor
884  class _SimdCastType1
885  {
886  using _Ap = __intrinsic_type_t<_Tp, _Np>;
887  _SimdMember _M_data;
888 
889  public:
890  _SimdCastType1(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {}
891  operator _SimdMember() const { return _M_data; }
892  };
893 
894  class _SimdCastType2
895  {
896  using _Ap = __intrinsic_type_t<_Tp, _Np>;
897  using _Bp = __vector_type_t<_Tp, _Np>;
898  _SimdMember _M_data;
899 
900  public:
901  _SimdCastType2(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {}
902  _SimdCastType2(_Bp __b) : _M_data(__b) {}
903  operator _SimdMember() const { return _M_data; }
904  };
905 
906  using _SimdCastType = conditional_t<
907  is_same<__intrinsic_type_t<_Tp, _Np>, __vector_type_t<_Tp, _Np>>::value,
908  _SimdCastType1, _SimdCastType2>;
909  //}}}
910  };
911 
912 // }}}
913 struct _CommonImplX86;
914 struct _CommonImplNeon;
915 struct _CommonImplBuiltin;
916 template <typename _Abi> struct _SimdImplBuiltin;
917 template <typename _Abi> struct _MaskImplBuiltin;
918 template <typename _Abi> struct _SimdImplX86;
919 template <typename _Abi> struct _MaskImplX86;
920 template <typename _Abi> struct _SimdImplNeon;
921 template <typename _Abi> struct _MaskImplNeon;
922 template <typename _Abi> struct _SimdImplPpc;
923 template <typename _Abi> struct _MaskImplPpc;
924 
925 // simd_abi::_VecBuiltin {{{
926 template <int _UsedBytes>
927  struct simd_abi::_VecBuiltin
928  {
929  template <typename _Tp>
930  static constexpr size_t _S_size = _UsedBytes / sizeof(_Tp);
931 
932  // validity traits {{{
933  struct _IsValidAbiTag : __bool_constant<(_UsedBytes > 1)> {};
934 
935  template <typename _Tp>
936  struct _IsValidSizeFor
937  : __bool_constant<(_UsedBytes / sizeof(_Tp) > 1
938  && _UsedBytes % sizeof(_Tp) == 0
939  && _UsedBytes <= __vectorized_sizeof<_Tp>()
940  && (!__have_avx512f || _UsedBytes <= 32))> {};
941 
942  template <typename _Tp>
943  struct _IsValid : conjunction<_IsValidAbiTag, __is_vectorizable<_Tp>,
944  _IsValidSizeFor<_Tp>> {};
945 
946  template <typename _Tp>
947  static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value;
948 
949  // }}}
950  // _SimdImpl/_MaskImpl {{{
951 #if _GLIBCXX_SIMD_X86INTRIN
952  using _CommonImpl = _CommonImplX86;
953  using _SimdImpl = _SimdImplX86<_VecBuiltin<_UsedBytes>>;
954  using _MaskImpl = _MaskImplX86<_VecBuiltin<_UsedBytes>>;
955 #elif _GLIBCXX_SIMD_HAVE_NEON
956  using _CommonImpl = _CommonImplNeon;
957  using _SimdImpl = _SimdImplNeon<_VecBuiltin<_UsedBytes>>;
958  using _MaskImpl = _MaskImplNeon<_VecBuiltin<_UsedBytes>>;
959 #else
960  using _CommonImpl = _CommonImplBuiltin;
961 #ifdef __ALTIVEC__
962  using _SimdImpl = _SimdImplPpc<_VecBuiltin<_UsedBytes>>;
963  using _MaskImpl = _MaskImplPpc<_VecBuiltin<_UsedBytes>>;
964 #else
965  using _SimdImpl = _SimdImplBuiltin<_VecBuiltin<_UsedBytes>>;
966  using _MaskImpl = _MaskImplBuiltin<_VecBuiltin<_UsedBytes>>;
967 #endif
968 #endif
969 
970  // }}}
971  // __traits {{{
972  template <typename _Tp>
973  using _MaskValueType = __int_for_sizeof_t<_Tp>;
974 
975  template <typename _Tp>
976  using __traits
977  = conditional_t<_S_is_valid_v<_Tp>,
978  _GnuTraits<_Tp, _MaskValueType<_Tp>,
979  _VecBuiltin<_UsedBytes>, _S_size<_Tp>>,
980  _InvalidTraits>;
981 
982  //}}}
983  // size metadata {{{
984  template <typename _Tp>
985  static constexpr size_t _S_full_size = __traits<_Tp>::_S_full_size;
986 
987  template <typename _Tp>
988  static constexpr bool _S_is_partial = __traits<_Tp>::_S_is_partial;
989 
990  // }}}
991  // implicit masks {{{
992  template <typename _Tp>
993  using _MaskMember = _SimdWrapper<_MaskValueType<_Tp>, _S_size<_Tp>>;
994 
995  template <typename _Tp>
996  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
997  _S_implicit_mask()
998  {
999  using _UV = typename _MaskMember<_Tp>::_BuiltinType;
1000  if constexpr (!_MaskMember<_Tp>::_S_is_partial)
1001  return ~_UV();
1002  else
1003  {
1004  constexpr auto __size = _S_size<_Tp>;
1005  _GLIBCXX_SIMD_USE_CONSTEXPR auto __r = __generate_vector<_UV>(
1006  [](auto __i) constexpr { return __i < __size ? -1 : 0; });
1007  return __r;
1008  }
1009  }
1010 
1011  template <typename _Tp>
1012  _GLIBCXX_SIMD_INTRINSIC static constexpr __intrinsic_type_t<_Tp,
1013  _S_size<_Tp>>
1014  _S_implicit_mask_intrin()
1015  {
1016  return __to_intrin(
1017  __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>()._M_data));
1018  }
1019 
1020  template <typename _TW, typename _TVT = _VectorTraits<_TW>>
1021  _GLIBCXX_SIMD_INTRINSIC static constexpr _TW _S_masked(_TW __x)
1022  {
1023  using _Tp = typename _TVT::value_type;
1024  if constexpr (!_MaskMember<_Tp>::_S_is_partial)
1025  return __x;
1026  else
1027  return __and(__as_vector(__x),
1028  __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>()));
1029  }
1030 
1031  template <typename _TW, typename _TVT = _VectorTraits<_TW>>
1032  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
1033  __make_padding_nonzero(_TW __x)
1034  {
1035  using _Tp = typename _TVT::value_type;
1036  if constexpr (!_S_is_partial<_Tp>)
1037  return __x;
1038  else
1039  {
1040  _GLIBCXX_SIMD_USE_CONSTEXPR auto __implicit_mask
1041  = __vector_bitcast<_Tp>(_S_implicit_mask<_Tp>());
1042  if constexpr (is_integral_v<_Tp>)
1043  return __or(__x, ~__implicit_mask);
1044  else
1045  {
1046  _GLIBCXX_SIMD_USE_CONSTEXPR auto __one
1047  = __andnot(__implicit_mask,
1048  __vector_broadcast<_S_full_size<_Tp>>(_Tp(1)));
1049  // it's not enough to return `x | 1_in_padding` because the
1050  // padding in x might be inf or nan (independent of
1051  // __FINITE_MATH_ONLY__, because it's about padding bits)
1052  return __or(__and(__x, __implicit_mask), __one);
1053  }
1054  }
1055  }
1056  // }}}
1057  };
1058 
1059 // }}}
1060 // simd_abi::_VecBltnBtmsk {{{
1061 template <int _UsedBytes>
1062  struct simd_abi::_VecBltnBtmsk
1063  {
1064  template <typename _Tp>
1065  static constexpr size_t _S_size = _UsedBytes / sizeof(_Tp);
1066 
1067  // validity traits {{{
1068  struct _IsValidAbiTag : __bool_constant<(_UsedBytes > 1)> {};
1069 
1070  template <typename _Tp>
1071  struct _IsValidSizeFor
1072  : __bool_constant<(_UsedBytes / sizeof(_Tp) > 1
1073  && _UsedBytes % sizeof(_Tp) == 0 && _UsedBytes <= 64
1074  && (_UsedBytes > 32 || __have_avx512vl))> {};
1075 
1076  // Bitmasks require at least AVX512F. If sizeof(_Tp) < 4 the AVX512BW is also
1077  // required.
1078  template <typename _Tp>
1079  struct _IsValid
1080  : conjunction<
1081  _IsValidAbiTag, __bool_constant<__have_avx512f>,
1082  __bool_constant<__have_avx512bw || (sizeof(_Tp) >= 4)>,
1083  __bool_constant<(__vectorized_sizeof<_Tp>() > sizeof(_Tp))>,
1084  _IsValidSizeFor<_Tp>> {};
1085 
1086  template <typename _Tp>
1087  static constexpr bool _S_is_valid_v = _IsValid<_Tp>::value;
1088 
1089  // }}}
1090  // simd/_MaskImpl {{{
1091  #if _GLIBCXX_SIMD_X86INTRIN
1092  using _CommonImpl = _CommonImplX86;
1093  using _SimdImpl = _SimdImplX86<_VecBltnBtmsk<_UsedBytes>>;
1094  using _MaskImpl = _MaskImplX86<_VecBltnBtmsk<_UsedBytes>>;
1095  #else
1096  template <int>
1097  struct _MissingImpl;
1098 
1099  using _CommonImpl = _MissingImpl<_UsedBytes>;
1100  using _SimdImpl = _MissingImpl<_UsedBytes>;
1101  using _MaskImpl = _MissingImpl<_UsedBytes>;
1102  #endif
1103 
1104  // }}}
1105  // __traits {{{
1106  template <typename _Tp>
1107  using _MaskMember = _SimdWrapper<bool, _S_size<_Tp>>;
1108 
1109  template <typename _Tp>
1110  using __traits = conditional_t<
1111  _S_is_valid_v<_Tp>,
1112  _GnuTraits<_Tp, bool, _VecBltnBtmsk<_UsedBytes>, _S_size<_Tp>>,
1113  _InvalidTraits>;
1114 
1115  //}}}
1116  // size metadata {{{
1117  template <typename _Tp>
1118  static constexpr size_t _S_full_size = __traits<_Tp>::_S_full_size;
1119  template <typename _Tp>
1120  static constexpr bool _S_is_partial = __traits<_Tp>::_S_is_partial;
1121 
1122  // }}}
1123  // implicit mask {{{
1124  private:
1125  template <typename _Tp>
1126  using _ImplicitMask = _SimdWrapper<bool, _S_size<_Tp>>;
1127 
1128  public:
1129  template <size_t _Np>
1130  _GLIBCXX_SIMD_INTRINSIC static constexpr __bool_storage_member_type_t<_Np>
1131  __implicit_mask_n()
1132  {
1133  using _Tp = __bool_storage_member_type_t<_Np>;
1134  return _Np < sizeof(_Tp) * __CHAR_BIT__ ? _Tp((1ULL << _Np) - 1) : ~_Tp();
1135  }
1136 
1137  template <typename _Tp>
1138  _GLIBCXX_SIMD_INTRINSIC static constexpr _ImplicitMask<_Tp>
1139  _S_implicit_mask()
1140  { return __implicit_mask_n<_S_size<_Tp>>(); }
1141 
1142  template <typename _Tp>
1143  _GLIBCXX_SIMD_INTRINSIC static constexpr __bool_storage_member_type_t<
1144  _S_size<_Tp>>
1145  _S_implicit_mask_intrin()
1146  { return __implicit_mask_n<_S_size<_Tp>>(); }
1147 
1148  template <typename _Tp, size_t _Np>
1149  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1150  _S_masked(_SimdWrapper<_Tp, _Np> __x)
1151  {
1152  if constexpr (is_same_v<_Tp, bool>)
1153  if constexpr (_Np < 8 || (_Np & (_Np - 1)) != 0)
1154  return _MaskImpl::_S_bit_and(
1155  __x, _SimdWrapper<_Tp, _Np>(
1156  __bool_storage_member_type_t<_Np>((1ULL << _Np) - 1)));
1157  else
1158  return __x;
1159  else
1160  return _S_masked(__x._M_data);
1161  }
1162 
1163  template <typename _TV>
1164  _GLIBCXX_SIMD_INTRINSIC static constexpr _TV
1165  _S_masked(_TV __x)
1166  {
1167  using _Tp = typename _VectorTraits<_TV>::value_type;
1168  static_assert(
1169  !__is_bitmask_v<_TV>,
1170  "_VecBltnBtmsk::_S_masked cannot work on bitmasks, since it doesn't "
1171  "know the number of elements. Use _SimdWrapper<bool, N> instead.");
1172  if constexpr (_S_is_partial<_Tp>)
1173  {
1174  constexpr size_t _Np = _S_size<_Tp>;
1175  return __make_dependent_t<_TV, _CommonImpl>::_S_blend(
1176  _S_implicit_mask<_Tp>(), _SimdWrapper<_Tp, _Np>(),
1177  _SimdWrapper<_Tp, _Np>(__x));
1178  }
1179  else
1180  return __x;
1181  }
1182 
1183  template <typename _TV, typename _TVT = _VectorTraits<_TV>>
1184  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
1185  __make_padding_nonzero(_TV __x)
1186  {
1187  using _Tp = typename _TVT::value_type;
1188  if constexpr (!_S_is_partial<_Tp>)
1189  return __x;
1190  else
1191  {
1192  constexpr size_t _Np = _S_size<_Tp>;
1193  if constexpr (is_integral_v<typename _TVT::value_type>)
1194  return __x
1195  | __generate_vector<_Tp, _S_full_size<_Tp>>(
1196  [](auto __i) -> _Tp {
1197  if (__i < _Np)
1198  return 0;
1199  else
1200  return 1;
1201  });
1202  else
1203  return __make_dependent_t<_TV, _CommonImpl>::_S_blend(
1204  _S_implicit_mask<_Tp>(),
1205  _SimdWrapper<_Tp, _Np>(
1206  __vector_broadcast<_S_full_size<_Tp>>(_Tp(1))),
1207  _SimdWrapper<_Tp, _Np>(__x))
1208  ._M_data;
1209  }
1210  }
1211 
1212  // }}}
1213  };
1214 
1215 //}}}
1216 // _CommonImplBuiltin {{{
1217 struct _CommonImplBuiltin
1218 {
1219  // _S_converts_via_decomposition{{{
1220  // This lists all cases where a __vector_convert needs to fall back to
1221  // conversion of individual scalars (i.e. decompose the input vector into
1222  // scalars, convert, compose output vector). In those cases, _S_masked_load &
1223  // _S_masked_store prefer to use the _S_bit_iteration implementation.
1224  template <typename _From, typename _To, size_t _ToSize>
1225  static inline constexpr bool __converts_via_decomposition_v
1226  = sizeof(_From) != sizeof(_To);
1227 
1228  // }}}
1229  // _S_load{{{
1230  template <typename _Tp, size_t _Np, size_t _Bytes = _Np * sizeof(_Tp)>
1231  _GLIBCXX_SIMD_INTRINSIC static __vector_type_t<_Tp, _Np>
1232  _S_load(const void* __p)
1233  {
1234  static_assert(_Np > 1);
1235  static_assert(_Bytes % sizeof(_Tp) == 0);
1236  using _Rp = __vector_type_t<_Tp, _Np>;
1237  if constexpr (sizeof(_Rp) == _Bytes)
1238  {
1239  _Rp __r;
1240  __builtin_memcpy(&__r, __p, _Bytes);
1241  return __r;
1242  }
1243  else
1244  {
1245 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424
1246  using _Up = conditional_t<
1247  is_integral_v<_Tp>,
1248  conditional_t<_Bytes % 4 == 0,
1249  conditional_t<_Bytes % 8 == 0, long long, int>,
1250  conditional_t<_Bytes % 2 == 0, short, signed char>>,
1251  conditional_t<(_Bytes < 8 || _Np % 2 == 1 || _Np == 2), _Tp,
1252  double>>;
1253  using _V = __vector_type_t<_Up, _Np * sizeof(_Tp) / sizeof(_Up)>;
1254  if constexpr (sizeof(_V) != sizeof(_Rp))
1255  { // on i386 with 4 < _Bytes <= 8
1256  _Rp __r{};
1257  __builtin_memcpy(&__r, __p, _Bytes);
1258  return __r;
1259  }
1260  else
1261 #else // _GLIBCXX_SIMD_WORKAROUND_PR90424
1262  using _V = _Rp;
1263 #endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
1264  {
1265  _V __r{};
1266  static_assert(_Bytes <= sizeof(_V));
1267  __builtin_memcpy(&__r, __p, _Bytes);
1268  return reinterpret_cast<_Rp>(__r);
1269  }
1270  }
1271  }
1272 
1273  // }}}
1274  // _S_store {{{
1275  template <size_t _ReqBytes = 0, typename _TV>
1276  _GLIBCXX_SIMD_INTRINSIC static void _S_store(_TV __x, void* __addr)
1277  {
1278  constexpr size_t _Bytes = _ReqBytes == 0 ? sizeof(__x) : _ReqBytes;
1279  static_assert(sizeof(__x) >= _Bytes);
1280 
1281  if constexpr (__is_vector_type_v<_TV>)
1282  {
1283  using _Tp = typename _VectorTraits<_TV>::value_type;
1284  constexpr size_t _Np = _Bytes / sizeof(_Tp);
1285  static_assert(_Np * sizeof(_Tp) == _Bytes);
1286 
1287 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR90424
1288  using _Up = conditional_t<
1289  (is_integral_v<_Tp> || _Bytes < 4),
1290  conditional_t<(sizeof(__x) > sizeof(long long)), long long, _Tp>,
1291  float>;
1292  const auto __v = __vector_bitcast<_Up>(__x);
1293 #else // _GLIBCXX_SIMD_WORKAROUND_PR90424
1294  const __vector_type_t<_Tp, _Np> __v = __x;
1295 #endif // _GLIBCXX_SIMD_WORKAROUND_PR90424
1296 
1297  if constexpr ((_Bytes & (_Bytes - 1)) != 0)
1298  {
1299  constexpr size_t _MoreBytes = std::__bit_ceil(_Bytes);
1300  alignas(decltype(__v)) char __tmp[_MoreBytes];
1301  __builtin_memcpy(__tmp, &__v, _MoreBytes);
1302  __builtin_memcpy(__addr, __tmp, _Bytes);
1303  }
1304  else
1305  __builtin_memcpy(__addr, &__v, _Bytes);
1306  }
1307  else
1308  __builtin_memcpy(__addr, &__x, _Bytes);
1309  }
1310 
1311  template <typename _Tp, size_t _Np>
1312  _GLIBCXX_SIMD_INTRINSIC static void _S_store(_SimdWrapper<_Tp, _Np> __x,
1313  void* __addr)
1314  { _S_store<_Np * sizeof(_Tp)>(__x._M_data, __addr); }
1315 
1316  // }}}
1317  // _S_store_bool_array(_BitMask) {{{
1318  template <size_t _Np, bool _Sanitized>
1319  _GLIBCXX_SIMD_INTRINSIC static constexpr void
1320  _S_store_bool_array(_BitMask<_Np, _Sanitized> __x, bool* __mem)
1321  {
1322  if constexpr (_Np == 1)
1323  __mem[0] = __x[0];
1324  else if constexpr (_Np == 2)
1325  {
1326  short __bool2 = (__x._M_to_bits() * 0x81) & 0x0101;
1327  _S_store<_Np>(__bool2, __mem);
1328  }
1329  else if constexpr (_Np == 3)
1330  {
1331  int __bool3 = (__x._M_to_bits() * 0x4081) & 0x010101;
1332  _S_store<_Np>(__bool3, __mem);
1333  }
1334  else
1335  {
1336  __execute_n_times<__div_roundup(_Np, 4)>([&](auto __i) {
1337  constexpr int __offset = __i * 4;
1338  constexpr int __remaining = _Np - __offset;
1339  if constexpr (__remaining > 4 && __remaining <= 7)
1340  {
1341  const _ULLong __bool7
1342  = (__x.template _M_extract<__offset>()._M_to_bits()
1343  * 0x40810204081ULL)
1344  & 0x0101010101010101ULL;
1345  _S_store<__remaining>(__bool7, __mem + __offset);
1346  }
1347  else if constexpr (__remaining >= 4)
1348  {
1349  int __bits = __x.template _M_extract<__offset>()._M_to_bits();
1350  if constexpr (__remaining > 7)
1351  __bits &= 0xf;
1352  const int __bool4 = (__bits * 0x204081) & 0x01010101;
1353  _S_store<4>(__bool4, __mem + __offset);
1354  }
1355  });
1356  }
1357  }
1358 
1359  // }}}
1360  // _S_blend{{{
1361  template <typename _Tp, size_t _Np>
1362  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
1363  _S_blend(_SimdWrapper<__int_for_sizeof_t<_Tp>, _Np> __k,
1364  _SimdWrapper<_Tp, _Np> __at0, _SimdWrapper<_Tp, _Np> __at1)
1365  { return __k._M_data ? __at1._M_data : __at0._M_data; }
1366 
1367  // }}}
1368 };
1369 
1370 // }}}
1371 // _SimdImplBuiltin {{{1
1372 template <typename _Abi>
1373  struct _SimdImplBuiltin
1374  {
1375  // member types {{{2
1376  template <typename _Tp>
1377  static constexpr size_t _S_max_store_size = 16;
1378 
1379  using abi_type = _Abi;
1380 
1381  template <typename _Tp>
1382  using _TypeTag = _Tp*;
1383 
1384  template <typename _Tp>
1385  using _SimdMember = typename _Abi::template __traits<_Tp>::_SimdMember;
1386 
1387  template <typename _Tp>
1388  using _MaskMember = typename _Abi::template _MaskMember<_Tp>;
1389 
1390  template <typename _Tp>
1391  static constexpr size_t _S_size = _Abi::template _S_size<_Tp>;
1392 
1393  template <typename _Tp>
1394  static constexpr size_t _S_full_size = _Abi::template _S_full_size<_Tp>;
1395 
1396  using _CommonImpl = typename _Abi::_CommonImpl;
1397  using _SuperImpl = typename _Abi::_SimdImpl;
1398  using _MaskImpl = typename _Abi::_MaskImpl;
1399 
1400  // _M_make_simd(_SimdWrapper/__intrinsic_type_t) {{{2
1401  template <typename _Tp, size_t _Np>
1402  _GLIBCXX_SIMD_INTRINSIC static simd<_Tp, _Abi>
1403  _M_make_simd(_SimdWrapper<_Tp, _Np> __x)
1404  { return {__private_init, __x}; }
1405 
1406  template <typename _Tp, size_t _Np>
1407  _GLIBCXX_SIMD_INTRINSIC static simd<_Tp, _Abi>
1408  _M_make_simd(__intrinsic_type_t<_Tp, _Np> __x)
1409  { return {__private_init, __vector_bitcast<_Tp>(__x)}; }
1410 
1411  // _S_broadcast {{{2
1412  template <typename _Tp>
1413  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdMember<_Tp>
1414  _S_broadcast(_Tp __x) noexcept
1415  { return __vector_broadcast<_S_full_size<_Tp>>(__x); }
1416 
1417  // _S_generator {{{2
1418  template <typename _Fp, typename _Tp>
1419  inline static constexpr _SimdMember<_Tp> _S_generator(_Fp&& __gen,
1420  _TypeTag<_Tp>)
1421  {
1422  return __generate_vector<_Tp, _S_full_size<_Tp>>([&](
1423  auto __i) constexpr {
1424  if constexpr (__i < _S_size<_Tp>)
1425  return __gen(__i);
1426  else
1427  return 0;
1428  });
1429  }
1430 
1431  // _S_load {{{2
1432  template <typename _Tp, typename _Up>
1433  _GLIBCXX_SIMD_INTRINSIC static _SimdMember<_Tp>
1434  _S_load(const _Up* __mem, _TypeTag<_Tp>) noexcept
1435  {
1436  constexpr size_t _Np = _S_size<_Tp>;
1437  constexpr size_t __max_load_size
1438  = (sizeof(_Up) >= 4 && __have_avx512f) || __have_avx512bw ? 64
1439  : (is_floating_point_v<_Up> && __have_avx) || __have_avx2 ? 32
1440  : 16;
1441  constexpr size_t __bytes_to_load = sizeof(_Up) * _Np;
1442  if constexpr (sizeof(_Up) > 8)
1443  return __generate_vector<_Tp, _SimdMember<_Tp>::_S_full_size>([&](
1444  auto __i) constexpr {
1445  return static_cast<_Tp>(__i < _Np ? __mem[__i] : 0);
1446  });
1447  else if constexpr (is_same_v<_Up, _Tp>)
1448  return _CommonImpl::template _S_load<_Tp, _S_full_size<_Tp>,
1449  _Np * sizeof(_Tp)>(__mem);
1450  else if constexpr (__bytes_to_load <= __max_load_size)
1451  return __convert<_SimdMember<_Tp>>(
1452  _CommonImpl::template _S_load<_Up, _Np>(__mem));
1453  else if constexpr (__bytes_to_load % __max_load_size == 0)
1454  {
1455  constexpr size_t __n_loads = __bytes_to_load / __max_load_size;
1456  constexpr size_t __elements_per_load = _Np / __n_loads;
1457  return __call_with_n_evaluations<__n_loads>(
1458  [](auto... __uncvted) {
1459  return __convert<_SimdMember<_Tp>>(__uncvted...);
1460  },
1461  [&](auto __i) {
1462  return _CommonImpl::template _S_load<_Up, __elements_per_load>(
1463  __mem + __i * __elements_per_load);
1464  });
1465  }
1466  else if constexpr (__bytes_to_load % (__max_load_size / 2) == 0
1467  && __max_load_size > 16)
1468  { // e.g. int[] -> <char, 12> with AVX2
1469  constexpr size_t __n_loads
1470  = __bytes_to_load / (__max_load_size / 2);
1471  constexpr size_t __elements_per_load = _Np / __n_loads;
1472  return __call_with_n_evaluations<__n_loads>(
1473  [](auto... __uncvted) {
1474  return __convert<_SimdMember<_Tp>>(__uncvted...);
1475  },
1476  [&](auto __i) {
1477  return _CommonImpl::template _S_load<_Up, __elements_per_load>(
1478  __mem + __i * __elements_per_load);
1479  });
1480  }
1481  else // e.g. int[] -> <char, 9>
1482  return __call_with_subscripts(
1483  __mem, make_index_sequence<_Np>(), [](auto... __args) {
1484  return __vector_type_t<_Tp, _S_full_size<_Tp>>{
1485  static_cast<_Tp>(__args)...};
1486  });
1487  }
1488 
1489  // _S_masked_load {{{2
1490  template <typename _Tp, size_t _Np, typename _Up>
1491  static inline _SimdWrapper<_Tp, _Np>
1492  _S_masked_load(_SimdWrapper<_Tp, _Np> __merge, _MaskMember<_Tp> __k,
1493  const _Up* __mem) noexcept
1494  {
1495  _BitOps::_S_bit_iteration(_MaskImpl::_S_to_bits(__k), [&](auto __i) {
1496  __merge._M_set(__i, static_cast<_Tp>(__mem[__i]));
1497  });
1498  return __merge;
1499  }
1500 
1501  // _S_store {{{2
1502  template <typename _Tp, typename _Up>
1503  _GLIBCXX_SIMD_INTRINSIC static void
1504  _S_store(_SimdMember<_Tp> __v, _Up* __mem, _TypeTag<_Tp>) noexcept
1505  {
1506  // TODO: converting int -> "smaller int" can be optimized with AVX512
1507  constexpr size_t _Np = _S_size<_Tp>;
1508  constexpr size_t __max_store_size
1509  = _SuperImpl::template _S_max_store_size<_Up>;
1510  if constexpr (sizeof(_Up) > 8)
1511  __execute_n_times<_Np>([&](auto __i) constexpr {
1512  __mem[__i] = __v[__i];
1513  });
1514  else if constexpr (is_same_v<_Up, _Tp>)
1515  _CommonImpl::_S_store(__v, __mem);
1516  else if constexpr (sizeof(_Up) * _Np <= __max_store_size)
1517  _CommonImpl::_S_store(_SimdWrapper<_Up, _Np>(__convert<_Up>(__v)),
1518  __mem);
1519  else
1520  {
1521  constexpr size_t __vsize = __max_store_size / sizeof(_Up);
1522  // round up to convert the last partial vector as well:
1523  constexpr size_t __stores = __div_roundup(_Np, __vsize);
1524  constexpr size_t __full_stores = _Np / __vsize;
1525  using _V = __vector_type_t<_Up, __vsize>;
1526  const array<_V, __stores> __converted
1527  = __convert_all<_V, __stores>(__v);
1528  __execute_n_times<__full_stores>([&](auto __i) constexpr {
1529  _CommonImpl::_S_store(__converted[__i], __mem + __i * __vsize);
1530  });
1531  if constexpr (__full_stores < __stores)
1532  _CommonImpl::template _S_store<(_Np - __full_stores * __vsize)
1533  * sizeof(_Up)>(
1534  __converted[__full_stores], __mem + __full_stores * __vsize);
1535  }
1536  }
1537 
1538  // _S_masked_store_nocvt {{{2
1539  template <typename _Tp, size_t _Np>
1540  _GLIBCXX_SIMD_INTRINSIC static void
1541  _S_masked_store_nocvt(_SimdWrapper<_Tp, _Np> __v, _Tp* __mem,
1542  _MaskMember<_Tp> __k)
1543  {
1544  _BitOps::_S_bit_iteration(
1545  _MaskImpl::_S_to_bits(__k), [&](auto __i) constexpr {
1546  __mem[__i] = __v[__i];
1547  });
1548  }
1549 
1550  // _S_masked_store {{{2
1551  template <typename _TW, typename _TVT = _VectorTraits<_TW>,
1552  typename _Tp = typename _TVT::value_type, typename _Up>
1553  static inline void
1554  _S_masked_store(const _TW __v, _Up* __mem, const _MaskMember<_Tp> __k)
1555  noexcept
1556  {
1557  constexpr size_t _TV_size = _S_size<_Tp>;
1558  [[maybe_unused]] const auto __vi = __to_intrin(__v);
1559  constexpr size_t __max_store_size
1560  = _SuperImpl::template _S_max_store_size<_Up>;
1561  if constexpr (
1562  is_same_v<
1563  _Tp,
1564  _Up> || (is_integral_v<_Tp> && is_integral_v<_Up> && sizeof(_Tp) == sizeof(_Up)))
1565  {
1566  // bitwise or no conversion, reinterpret:
1567  const _MaskMember<_Up> __kk = [&]() {
1568  if constexpr (__is_bitmask_v<decltype(__k)>)
1569  return _MaskMember<_Up>(__k._M_data);
1570  else
1571  return __wrapper_bitcast<__int_for_sizeof_t<_Up>>(__k);
1572  }();
1573  _SuperImpl::_S_masked_store_nocvt(__wrapper_bitcast<_Up>(__v),
1574  __mem, __kk);
1575  }
1576  else if constexpr (__vectorized_sizeof<_Up>() > sizeof(_Up)
1577  && !_CommonImpl::
1578  template __converts_via_decomposition_v<
1579  _Tp, _Up, __max_store_size>)
1580  { // conversion via decomposition is better handled via the
1581  // bit_iteration
1582  // fallback below
1583  constexpr size_t _UW_size
1584  = std::min(_TV_size, __max_store_size / sizeof(_Up));
1585  static_assert(_UW_size <= _TV_size);
1586  using _UW = _SimdWrapper<_Up, _UW_size>;
1587  using _UV = __vector_type_t<_Up, _UW_size>;
1588  using _UAbi = simd_abi::deduce_t<_Up, _UW_size>;
1589  if constexpr (_UW_size == _TV_size) // one convert+store
1590  {
1591  const _UW __converted = __convert<_UW>(__v);
1592  _SuperImpl::_S_masked_store_nocvt(
1593  __converted, __mem,
1594  _UAbi::_MaskImpl::template _S_convert<
1595  __int_for_sizeof_t<_Up>>(__k));
1596  }
1597  else
1598  {
1599  static_assert(_UW_size * sizeof(_Up) == __max_store_size);
1600  constexpr size_t _NFullStores = _TV_size / _UW_size;
1601  constexpr size_t _NAllStores
1602  = __div_roundup(_TV_size, _UW_size);
1603  constexpr size_t _NParts = _S_full_size<_Tp> / _UW_size;
1604  const array<_UV, _NAllStores> __converted
1605  = __convert_all<_UV, _NAllStores>(__v);
1606  __execute_n_times<_NFullStores>([&](auto __i) {
1607  _SuperImpl::_S_masked_store_nocvt(
1608  _UW(__converted[__i]), __mem + __i * _UW_size,
1609  _UAbi::_MaskImpl::template _S_convert<
1610  __int_for_sizeof_t<_Up>>(
1611  __extract_part<__i, _NParts>(__k.__as_full_vector())));
1612  });
1613  if constexpr (_NAllStores
1614  > _NFullStores) // one partial at the end
1615  _SuperImpl::_S_masked_store_nocvt(
1616  _UW(__converted[_NFullStores]),
1617  __mem + _NFullStores * _UW_size,
1618  _UAbi::_MaskImpl::template _S_convert<
1619  __int_for_sizeof_t<_Up>>(
1620  __extract_part<_NFullStores, _NParts>(
1621  __k.__as_full_vector())));
1622  }
1623  }
1624  else
1625  _BitOps::_S_bit_iteration(
1626  _MaskImpl::_S_to_bits(__k), [&](auto __i) constexpr {
1627  __mem[__i] = static_cast<_Up>(__v[__i]);
1628  });
1629  }
1630 
1631  // _S_complement {{{2
1632  template <typename _Tp, size_t _Np>
1633  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1634  _S_complement(_SimdWrapper<_Tp, _Np> __x) noexcept
1635  {
1636  if constexpr (is_floating_point_v<_Tp>)
1637  return __vector_bitcast<_Tp>(~__vector_bitcast<__int_for_sizeof_t<_Tp>>(__x));
1638  else
1639  return ~__x._M_data;
1640  }
1641 
1642  // _S_unary_minus {{{2
1643  template <typename _Tp, size_t _Np>
1644  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1645  _S_unary_minus(_SimdWrapper<_Tp, _Np> __x) noexcept
1646  {
1647  // GCC doesn't use the psign instructions, but pxor & psub seem to be
1648  // just as good a choice as pcmpeqd & psign. So meh.
1649  return -__x._M_data;
1650  }
1651 
1652  // arithmetic operators {{{2
1653  template <typename _Tp, size_t _Np>
1654  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1655  _S_plus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1656  { return __x._M_data + __y._M_data; }
1657 
1658  template <typename _Tp, size_t _Np>
1659  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1660  _S_minus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1661  { return __x._M_data - __y._M_data; }
1662 
1663  template <typename _Tp, size_t _Np>
1664  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1665  _S_multiplies(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1666  { return __x._M_data * __y._M_data; }
1667 
1668  template <typename _Tp, size_t _Np>
1669  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1670  _S_divides(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1671  {
1672  // Note that division by 0 is always UB, so we must ensure we avoid the
1673  // case for partial registers
1674  if constexpr (!_Abi::template _S_is_partial<_Tp>)
1675  return __x._M_data / __y._M_data;
1676  else
1677  return __x._M_data / _Abi::__make_padding_nonzero(__y._M_data);
1678  }
1679 
1680  template <typename _Tp, size_t _Np>
1681  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1682  _S_modulus(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1683  {
1684  if constexpr (!_Abi::template _S_is_partial<_Tp>)
1685  return __x._M_data % __y._M_data;
1686  else
1687  return __as_vector(__x)
1688  % _Abi::__make_padding_nonzero(__as_vector(__y));
1689  }
1690 
1691  template <typename _Tp, size_t _Np>
1692  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1693  _S_bit_and(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1694  { return __and(__x, __y); }
1695 
1696  template <typename _Tp, size_t _Np>
1697  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1698  _S_bit_or(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1699  { return __or(__x, __y); }
1700 
1701  template <typename _Tp, size_t _Np>
1702  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1703  _S_bit_xor(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1704  { return __xor(__x, __y); }
1705 
1706  template <typename _Tp, size_t _Np>
1707  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
1708  _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1709  { return __x._M_data << __y._M_data; }
1710 
1711  template <typename _Tp, size_t _Np>
1712  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
1713  _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1714  { return __x._M_data >> __y._M_data; }
1715 
1716  template <typename _Tp, size_t _Np>
1717  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1718  _S_bit_shift_left(_SimdWrapper<_Tp, _Np> __x, int __y)
1719  { return __x._M_data << __y; }
1720 
1721  template <typename _Tp, size_t _Np>
1722  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
1723  _S_bit_shift_right(_SimdWrapper<_Tp, _Np> __x, int __y)
1724  { return __x._M_data >> __y; }
1725 
1726  // compares {{{2
1727  // _S_equal_to {{{3
1728  template <typename _Tp, size_t _Np>
1729  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1730  _S_equal_to(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1731  { return __x._M_data == __y._M_data; }
1732 
1733  // _S_not_equal_to {{{3
1734  template <typename _Tp, size_t _Np>
1735  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1736  _S_not_equal_to(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1737  { return __x._M_data != __y._M_data; }
1738 
1739  // _S_less {{{3
1740  template <typename _Tp, size_t _Np>
1741  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1742  _S_less(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1743  { return __x._M_data < __y._M_data; }
1744 
1745  // _S_less_equal {{{3
1746  template <typename _Tp, size_t _Np>
1747  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1748  _S_less_equal(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
1749  { return __x._M_data <= __y._M_data; }
1750 
1751  // _S_negate {{{2
1752  template <typename _Tp, size_t _Np>
1753  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
1754  _S_negate(_SimdWrapper<_Tp, _Np> __x) noexcept
1755  { return !__x._M_data; }
1756 
1757  // _S_min, _S_max, _S_minmax {{{2
1758  template <typename _Tp, size_t _Np>
1759  _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
1760  _SimdWrapper<_Tp, _Np>
1761  _S_min(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b)
1762  { return __a._M_data < __b._M_data ? __a._M_data : __b._M_data; }
1763 
1764  template <typename _Tp, size_t _Np>
1765  _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
1766  _SimdWrapper<_Tp, _Np>
1767  _S_max(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b)
1768  { return __a._M_data > __b._M_data ? __a._M_data : __b._M_data; }
1769 
1770  template <typename _Tp, size_t _Np>
1771  _GLIBCXX_SIMD_NORMAL_MATH _GLIBCXX_SIMD_INTRINSIC static constexpr
1772  pair<_SimdWrapper<_Tp, _Np>, _SimdWrapper<_Tp, _Np>>
1773  _S_minmax(_SimdWrapper<_Tp, _Np> __a, _SimdWrapper<_Tp, _Np> __b)
1774  {
1775  return {__a._M_data < __b._M_data ? __a._M_data : __b._M_data,
1776  __a._M_data < __b._M_data ? __b._M_data : __a._M_data};
1777  }
1778 
1779  // reductions {{{2
1780  template <size_t _Np, size_t... _Is, size_t... _Zeros, typename _Tp,
1781  typename _BinaryOperation>
1782  _GLIBCXX_SIMD_INTRINSIC static _Tp
1783  _S_reduce_partial(index_sequence<_Is...>, index_sequence<_Zeros...>,
1784  simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op)
1785  {
1786  using _V = __vector_type_t<_Tp, _Np / 2>;
1787  static_assert(sizeof(_V) <= sizeof(__x));
1788  // _S_full_size is the size of the smallest native SIMD register that
1789  // can store _Np/2 elements:
1790  using _FullSimd = __deduced_simd<_Tp, _VectorTraits<_V>::_S_full_size>;
1791  using _HalfSimd = __deduced_simd<_Tp, _Np / 2>;
1792  const auto __xx = __as_vector(__x);
1793  return _HalfSimd::abi_type::_SimdImpl::_S_reduce(
1794  static_cast<_HalfSimd>(__as_vector(__binary_op(
1795  static_cast<_FullSimd>(__intrin_bitcast<_V>(__xx)),
1796  static_cast<_FullSimd>(__intrin_bitcast<_V>(
1797  __vector_permute<(_Np / 2 + _Is)..., (int(_Zeros * 0) - 1)...>(
1798  __xx)))))),
1799  __binary_op);
1800  }
1801 
1802  template <typename _Tp, typename _BinaryOperation>
1803  _GLIBCXX_SIMD_INTRINSIC static constexpr _Tp
1804  _S_reduce(simd<_Tp, _Abi> __x, _BinaryOperation&& __binary_op)
1805  {
1806  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
1807  if constexpr (_Np == 1)
1808  return __x[0];
1809  else if constexpr (_Np == 2)
1810  return __binary_op(simd<_Tp, simd_abi::scalar>(__x[0]),
1811  simd<_Tp, simd_abi::scalar>(__x[1]))[0];
1812  else if constexpr (_Abi::template _S_is_partial<_Tp>) //{{{
1813  {
1814  [[maybe_unused]] constexpr auto __full_size
1815  = _Abi::template _S_full_size<_Tp>;
1816  if constexpr (_Np == 3)
1817  return __binary_op(
1818  __binary_op(simd<_Tp, simd_abi::scalar>(__x[0]),
1819  simd<_Tp, simd_abi::scalar>(__x[1])),
1820  simd<_Tp, simd_abi::scalar>(__x[2]))[0];
1821  else if constexpr (is_same_v<__remove_cvref_t<_BinaryOperation>,
1822  plus<>>)
1823  {
1824  using _Ap = simd_abi::deduce_t<_Tp, __full_size>;
1825  return _Ap::_SimdImpl::_S_reduce(
1826  simd<_Tp, _Ap>(__private_init,
1827  _Abi::_S_masked(__as_vector(__x))),
1828  __binary_op);
1829  }
1830  else if constexpr (is_same_v<__remove_cvref_t<_BinaryOperation>,
1831  multiplies<>>)
1832  {
1833  using _Ap = simd_abi::deduce_t<_Tp, __full_size>;
1834  using _TW = _SimdWrapper<_Tp, __full_size>;
1835  _GLIBCXX_SIMD_USE_CONSTEXPR auto __implicit_mask_full
1836  = _Abi::template _S_implicit_mask<_Tp>().__as_full_vector();
1837  _GLIBCXX_SIMD_USE_CONSTEXPR _TW __one
1838  = __vector_broadcast<__full_size>(_Tp(1));
1839  const _TW __x_full = __data(__x).__as_full_vector();
1840  const _TW __x_padded_with_ones
1841  = _Ap::_CommonImpl::_S_blend(__implicit_mask_full, __one,
1842  __x_full);
1843  return _Ap::_SimdImpl::_S_reduce(
1844  simd<_Tp, _Ap>(__private_init, __x_padded_with_ones),
1845  __binary_op);
1846  }
1847  else if constexpr (_Np & 1)
1848  {
1849  using _Ap = simd_abi::deduce_t<_Tp, _Np - 1>;
1850  return __binary_op(
1851  simd<_Tp, simd_abi::scalar>(_Ap::_SimdImpl::_S_reduce(
1852  simd<_Tp, _Ap>(
1853  __intrin_bitcast<__vector_type_t<_Tp, _Np - 1>>(
1854  __as_vector(__x))),
1855  __binary_op)),
1856  simd<_Tp, simd_abi::scalar>(__x[_Np - 1]))[0];
1857  }
1858  else
1859  return _S_reduce_partial<_Np>(
1860  make_index_sequence<_Np / 2>(),
1861  make_index_sequence<__full_size - _Np / 2>(), __x, __binary_op);
1862  } //}}}
1863  else if constexpr (sizeof(__x) == 16) //{{{
1864  {
1865  if constexpr (_Np == 16)
1866  {
1867  const auto __y = __data(__x);
1868  __x = __binary_op(
1869  _M_make_simd<_Tp, _Np>(
1870  __vector_permute<0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
1871  7, 7>(__y)),
1872  _M_make_simd<_Tp, _Np>(
1873  __vector_permute<8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
1874  14, 14, 15, 15>(__y)));
1875  }
1876  if constexpr (_Np >= 8)
1877  {
1878  const auto __y = __vector_bitcast<short>(__data(__x));
1879  __x = __binary_op(
1880  _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
1881  __vector_permute<0, 0, 1, 1, 2, 2, 3, 3>(__y))),
1882  _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
1883  __vector_permute<4, 4, 5, 5, 6, 6, 7, 7>(__y))));
1884  }
1885  if constexpr (_Np >= 4)
1886  {
1887  using _Up = conditional_t<is_floating_point_v<_Tp>, float, int>;
1888  const auto __y = __vector_bitcast<_Up>(__data(__x));
1889  __x = __binary_op(__x,
1890  _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
1891  __vector_permute<3, 2, 1, 0>(__y))));
1892  }
1893  using _Up = conditional_t<is_floating_point_v<_Tp>, double, _LLong>;
1894  const auto __y = __vector_bitcast<_Up>(__data(__x));
1895  __x = __binary_op(__x, _M_make_simd<_Tp, _Np>(__vector_bitcast<_Tp>(
1896  __vector_permute<1, 1>(__y))));
1897  return __x[0];
1898  } //}}}
1899  else
1900  {
1901  static_assert(sizeof(__x) > __min_vector_size<_Tp>);
1902  static_assert((_Np & (_Np - 1)) == 0); // _Np must be a power of 2
1903  using _Ap = simd_abi::deduce_t<_Tp, _Np / 2>;
1904  using _V = simd<_Tp, _Ap>;
1905  return _Ap::_SimdImpl::_S_reduce(
1906  __binary_op(_V(__private_init, __extract<0, 2>(__as_vector(__x))),
1907  _V(__private_init,
1908  __extract<1, 2>(__as_vector(__x)))),
1909  static_cast<_BinaryOperation&&>(__binary_op));
1910  }
1911  }
1912 
1913  // math {{{2
1914  // frexp, modf and copysign implemented in simd_math.h
1915 #define _GLIBCXX_SIMD_MATH_FALLBACK(__name) \
1916  template <typename _Tp, typename... _More> \
1917  static _Tp _S_##__name(const _Tp& __x, const _More&... __more) \
1918  { \
1919  return __generate_vector<_Tp>( \
1920  [&](auto __i) { return __name(__x[__i], __more[__i]...); }); \
1921  }
1922 
1923 #define _GLIBCXX_SIMD_MATH_FALLBACK_MASKRET(__name) \
1924  template <typename _Tp, typename... _More> \
1925  static typename _Tp::mask_type _S_##__name(const _Tp& __x, \
1926  const _More&... __more) \
1927  { \
1928  return __generate_vector<_Tp>( \
1929  [&](auto __i) { return __name(__x[__i], __more[__i]...); }); \
1930  }
1931 
1932 #define _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(_RetTp, __name) \
1933  template <typename _Tp, typename... _More> \
1934  static auto _S_##__name(const _Tp& __x, const _More&... __more) \
1935  { \
1936  return __fixed_size_storage_t<_RetTp, \
1937  _VectorTraits<_Tp>::_S_partial_width>:: \
1938  _S_generate([&](auto __meta) constexpr { \
1939  return __meta._S_generator( \
1940  [&](auto __i) { \
1941  return __name(__x[__meta._S_offset + __i], \
1942  __more[__meta._S_offset + __i]...); \
1943  }, \
1944  static_cast<_RetTp*>(nullptr)); \
1945  }); \
1946  }
1947 
1948  _GLIBCXX_SIMD_MATH_FALLBACK(acos)
1949  _GLIBCXX_SIMD_MATH_FALLBACK(asin)
1950  _GLIBCXX_SIMD_MATH_FALLBACK(atan)
1951  _GLIBCXX_SIMD_MATH_FALLBACK(atan2)
1952  _GLIBCXX_SIMD_MATH_FALLBACK(cos)
1953  _GLIBCXX_SIMD_MATH_FALLBACK(sin)
1954  _GLIBCXX_SIMD_MATH_FALLBACK(tan)
1955  _GLIBCXX_SIMD_MATH_FALLBACK(acosh)
1956  _GLIBCXX_SIMD_MATH_FALLBACK(asinh)
1957  _GLIBCXX_SIMD_MATH_FALLBACK(atanh)
1958  _GLIBCXX_SIMD_MATH_FALLBACK(cosh)
1959  _GLIBCXX_SIMD_MATH_FALLBACK(sinh)
1960  _GLIBCXX_SIMD_MATH_FALLBACK(tanh)
1961  _GLIBCXX_SIMD_MATH_FALLBACK(exp)
1962  _GLIBCXX_SIMD_MATH_FALLBACK(exp2)
1963  _GLIBCXX_SIMD_MATH_FALLBACK(expm1)
1964  _GLIBCXX_SIMD_MATH_FALLBACK(ldexp)
1965  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(int, ilogb)
1966  _GLIBCXX_SIMD_MATH_FALLBACK(log)
1967  _GLIBCXX_SIMD_MATH_FALLBACK(log10)
1968  _GLIBCXX_SIMD_MATH_FALLBACK(log1p)
1969  _GLIBCXX_SIMD_MATH_FALLBACK(log2)
1970  _GLIBCXX_SIMD_MATH_FALLBACK(logb)
1971 
1972  // modf implemented in simd_math.h
1973  _GLIBCXX_SIMD_MATH_FALLBACK(scalbn)
1974  _GLIBCXX_SIMD_MATH_FALLBACK(scalbln)
1975  _GLIBCXX_SIMD_MATH_FALLBACK(cbrt)
1976  _GLIBCXX_SIMD_MATH_FALLBACK(fabs)
1977  _GLIBCXX_SIMD_MATH_FALLBACK(pow)
1978  _GLIBCXX_SIMD_MATH_FALLBACK(sqrt)
1979  _GLIBCXX_SIMD_MATH_FALLBACK(erf)
1980  _GLIBCXX_SIMD_MATH_FALLBACK(erfc)
1981  _GLIBCXX_SIMD_MATH_FALLBACK(lgamma)
1982  _GLIBCXX_SIMD_MATH_FALLBACK(tgamma)
1983 
1984  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lrint)
1985  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llrint)
1986 
1987  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long, lround)
1988  _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET(long long, llround)
1989 
1990  _GLIBCXX_SIMD_MATH_FALLBACK(fmod)
1991  _GLIBCXX_SIMD_MATH_FALLBACK(remainder)
1992 
1993  template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1994  static _Tp
1995  _S_remquo(const _Tp __x, const _Tp __y,
1996  __fixed_size_storage_t<int, _TVT::_S_partial_width>* __z)
1997  {
1998  return __generate_vector<_Tp>([&](auto __i) {
1999  int __tmp;
2000  auto __r = remquo(__x[__i], __y[__i], &__tmp);
2001  __z->_M_set(__i, __tmp);
2002  return __r;
2003  });
2004  }
2005 
2006  // copysign in simd_math.h
2007  _GLIBCXX_SIMD_MATH_FALLBACK(nextafter)
2008  _GLIBCXX_SIMD_MATH_FALLBACK(fdim)
2009  _GLIBCXX_SIMD_MATH_FALLBACK(fmax)
2010  _GLIBCXX_SIMD_MATH_FALLBACK(fmin)
2011  _GLIBCXX_SIMD_MATH_FALLBACK(fma)
2012 
2013  template <typename _Tp, size_t _Np>
2014  static constexpr _MaskMember<_Tp>
2015  _S_isgreater(_SimdWrapper<_Tp, _Np> __x,
2016  _SimdWrapper<_Tp, _Np> __y) noexcept
2017  {
2018  using _Ip = __int_for_sizeof_t<_Tp>;
2019  const auto __xn = __vector_bitcast<_Ip>(__x);
2020  const auto __yn = __vector_bitcast<_Ip>(__y);
2021  const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
2022  const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
2023  return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
2024  __xp > __yp);
2025  }
2026 
2027  template <typename _Tp, size_t _Np>
2028  static constexpr _MaskMember<_Tp>
2029  _S_isgreaterequal(_SimdWrapper<_Tp, _Np> __x,
2030  _SimdWrapper<_Tp, _Np> __y) noexcept
2031  {
2032  using _Ip = __int_for_sizeof_t<_Tp>;
2033  const auto __xn = __vector_bitcast<_Ip>(__x);
2034  const auto __yn = __vector_bitcast<_Ip>(__y);
2035  const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
2036  const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
2037  return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
2038  __xp >= __yp);
2039  }
2040 
2041  template <typename _Tp, size_t _Np>
2042  static constexpr _MaskMember<_Tp>
2043  _S_isless(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y) noexcept
2044  {
2045  using _Ip = __int_for_sizeof_t<_Tp>;
2046  const auto __xn = __vector_bitcast<_Ip>(__x);
2047  const auto __yn = __vector_bitcast<_Ip>(__y);
2048  const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
2049  const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
2050  return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
2051  __xp < __yp);
2052  }
2053 
2054  template <typename _Tp, size_t _Np>
2055  static constexpr _MaskMember<_Tp>
2056  _S_islessequal(_SimdWrapper<_Tp, _Np> __x,
2057  _SimdWrapper<_Tp, _Np> __y) noexcept
2058  {
2059  using _Ip = __int_for_sizeof_t<_Tp>;
2060  const auto __xn = __vector_bitcast<_Ip>(__x);
2061  const auto __yn = __vector_bitcast<_Ip>(__y);
2062  const auto __xp = __xn < 0 ? -(__xn & __finite_max_v<_Ip>) : __xn;
2063  const auto __yp = __yn < 0 ? -(__yn & __finite_max_v<_Ip>) : __yn;
2064  return __andnot(_SuperImpl::_S_isunordered(__x, __y)._M_data,
2065  __xp <= __yp);
2066  }
2067 
2068  template <typename _Tp, size_t _Np>
2069  static constexpr _MaskMember<_Tp>
2070  _S_islessgreater(_SimdWrapper<_Tp, _Np> __x,
2071  _SimdWrapper<_Tp, _Np> __y) noexcept
2072  {
2073  return __andnot(_SuperImpl::_S_isunordered(__x, __y),
2074  _SuperImpl::_S_not_equal_to(__x, __y));
2075  }
2076 
2077 #undef _GLIBCXX_SIMD_MATH_FALLBACK
2078 #undef _GLIBCXX_SIMD_MATH_FALLBACK_MASKRET
2079 #undef _GLIBCXX_SIMD_MATH_FALLBACK_FIXEDRET
2080  // _S_abs {{{3
2081  template <typename _Tp, size_t _Np>
2082  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2083  _S_abs(_SimdWrapper<_Tp, _Np> __x) noexcept
2084  {
2085  // if (__builtin_is_constant_evaluated())
2086  // {
2087  // return __x._M_data < 0 ? -__x._M_data : __x._M_data;
2088  // }
2089  if constexpr (is_floating_point_v<_Tp>)
2090  // `v < 0 ? -v : v` cannot compile to the efficient implementation of
2091  // masking the signbit off because it must consider v == -0
2092 
2093  // ~(-0.) & v would be easy, but breaks with fno-signed-zeros
2094  return __and(_S_absmask<__vector_type_t<_Tp, _Np>>, __x._M_data);
2095  else
2096  return __x._M_data < 0 ? -__x._M_data : __x._M_data;
2097  }
2098 
2099  // }}}3
2100  // _S_plus_minus {{{
2101  // Returns __x + __y - __y without -fassociative-math optimizing to __x.
2102  // - _TV must be __vector_type_t<floating-point type, N>.
2103  // - _UV must be _TV or floating-point type.
2104  template <typename _TV, typename _UV>
2105  _GLIBCXX_SIMD_INTRINSIC static constexpr _TV _S_plus_minus(_TV __x,
2106  _UV __y) noexcept
2107  {
2108  #if defined __i386__ && !defined __SSE_MATH__
2109  if constexpr (sizeof(__x) == 8)
2110  { // operations on __x would use the FPU
2111  static_assert(is_same_v<_TV, __vector_type_t<float, 2>>);
2112  const auto __x4 = __vector_bitcast<float, 4>(__x);
2113  if constexpr (is_same_v<_TV, _UV>)
2114  return __vector_bitcast<float, 2>(
2115  _S_plus_minus(__x4, __vector_bitcast<float, 4>(__y)));
2116  else
2117  return __vector_bitcast<float, 2>(_S_plus_minus(__x4, __y));
2118  }
2119  #endif
2120  #if !defined __clang__ && __GCC_IEC_559 == 0
2121  if (__builtin_is_constant_evaluated()
2122  || (__builtin_constant_p(__x) && __builtin_constant_p(__y)))
2123  return (__x + __y) - __y;
2124  else
2125  return [&] {
2126  __x += __y;
2127  if constexpr(__have_sse)
2128  {
2129  if constexpr (sizeof(__x) >= 16)
2130  asm("" : "+x"(__x));
2131  else if constexpr (is_same_v<__vector_type_t<float, 2>, _TV>)
2132  asm("" : "+x"(__x[0]), "+x"(__x[1]));
2133  else
2134  __assert_unreachable<_TV>();
2135  }
2136  else if constexpr(__have_neon)
2137  asm("" : "+w"(__x));
2138  else if constexpr (__have_power_vmx)
2139  {
2140  if constexpr (is_same_v<__vector_type_t<float, 2>, _TV>)
2141  asm("" : "+fgr"(__x[0]), "+fgr"(__x[1]));
2142  else
2143  asm("" : "+v"(__x));
2144  }
2145  else
2146  asm("" : "+g"(__x));
2147  return __x - __y;
2148  }();
2149  #else
2150  return (__x + __y) - __y;
2151  #endif
2152  }
2153 
2154  // }}}
2155  // _S_nearbyint {{{3
2156  template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2157  _GLIBCXX_SIMD_INTRINSIC static _Tp _S_nearbyint(_Tp __x_) noexcept
2158  {
2159  using value_type = typename _TVT::value_type;
2160  using _V = typename _TVT::type;
2161  const _V __x = __x_;
2162  const _V __absx = __and(__x, _S_absmask<_V>);
2163  static_assert(__CHAR_BIT__ * sizeof(1ull) >= __digits_v<value_type>);
2164  _GLIBCXX_SIMD_USE_CONSTEXPR _V __shifter_abs
2165  = _V() + (1ull << (__digits_v<value_type> - 1));
2166  const _V __shifter = __or(__and(_S_signmask<_V>, __x), __shifter_abs);
2167  const _V __shifted = _S_plus_minus(__x, __shifter);
2168  return __absx < __shifter_abs ? __shifted : __x;
2169  }
2170 
2171  // _S_rint {{{3
2172  template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2173  _GLIBCXX_SIMD_INTRINSIC static _Tp _S_rint(_Tp __x) noexcept
2174  {
2175  return _SuperImpl::_S_nearbyint(__x);
2176  }
2177 
2178  // _S_trunc {{{3
2179  template <typename _Tp, size_t _Np>
2180  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2181  _S_trunc(_SimdWrapper<_Tp, _Np> __x)
2182  {
2183  using _V = __vector_type_t<_Tp, _Np>;
2184  const _V __absx = __and(__x._M_data, _S_absmask<_V>);
2185  static_assert(__CHAR_BIT__ * sizeof(1ull) >= __digits_v<_Tp>);
2186  constexpr _Tp __shifter = 1ull << (__digits_v<_Tp> - 1);
2187  _V __truncated = _S_plus_minus(__absx, __shifter);
2188  __truncated -= __truncated > __absx ? _V() + 1 : _V();
2189  return __absx < __shifter ? __or(__xor(__absx, __x._M_data), __truncated)
2190  : __x._M_data;
2191  }
2192 
2193  // _S_round {{{3
2194  template <typename _Tp, size_t _Np>
2195  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2196  _S_round(_SimdWrapper<_Tp, _Np> __x)
2197  {
2198  const auto __abs_x = _SuperImpl::_S_abs(__x);
2199  const auto __t_abs = _SuperImpl::_S_trunc(__abs_x)._M_data;
2200  const auto __r_abs // round(abs(x)) =
2201  = __t_abs + (__abs_x._M_data - __t_abs >= _Tp(.5) ? _Tp(1) : 0);
2202  return __or(__xor(__abs_x._M_data, __x._M_data), __r_abs);
2203  }
2204 
2205  // _S_floor {{{3
2206  template <typename _Tp, size_t _Np>
2207  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2208  _S_floor(_SimdWrapper<_Tp, _Np> __x)
2209  {
2210  const auto __y = _SuperImpl::_S_trunc(__x)._M_data;
2211  const auto __negative_input
2212  = __vector_bitcast<_Tp>(__x._M_data < __vector_broadcast<_Np, _Tp>(0));
2213  const auto __mask
2214  = __andnot(__vector_bitcast<_Tp>(__y == __x._M_data), __negative_input);
2215  return __or(__andnot(__mask, __y),
2216  __and(__mask, __y - __vector_broadcast<_Np, _Tp>(1)));
2217  }
2218 
2219  // _S_ceil {{{3
2220  template <typename _Tp, size_t _Np>
2221  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2222  _S_ceil(_SimdWrapper<_Tp, _Np> __x)
2223  {
2224  const auto __y = _SuperImpl::_S_trunc(__x)._M_data;
2225  const auto __negative_input
2226  = __vector_bitcast<_Tp>(__x._M_data < __vector_broadcast<_Np, _Tp>(0));
2227  const auto __inv_mask
2228  = __or(__vector_bitcast<_Tp>(__y == __x._M_data), __negative_input);
2229  return __or(__and(__inv_mask, __y),
2230  __andnot(__inv_mask, __y + __vector_broadcast<_Np, _Tp>(1)));
2231  }
2232 
2233  // _S_isnan {{{3
2234  template <typename _Tp, size_t _Np>
2235  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2236  _S_isnan([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x)
2237  {
2238  #if __FINITE_MATH_ONLY__
2239  return {}; // false
2240  #elif !defined __SUPPORT_SNAN__
2241  return ~(__x._M_data == __x._M_data);
2242  #elif defined __STDC_IEC_559__
2243  using _Ip = __int_for_sizeof_t<_Tp>;
2244  const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x));
2245  const auto __infn
2246  = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__infinity_v<_Tp>));
2247  return __infn < __absn;
2248  #else
2249  #error "Not implemented: how to support SNaN but non-IEC559 floating-point?"
2250  #endif
2251  }
2252 
2253  // _S_isfinite {{{3
2254  template <typename _Tp, size_t _Np>
2255  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2256  _S_isfinite([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x)
2257  {
2258  #if __FINITE_MATH_ONLY__
2259  using _UV = typename _MaskMember<_Tp>::_BuiltinType;
2260  _GLIBCXX_SIMD_USE_CONSTEXPR _UV __alltrue = ~_UV();
2261  return __alltrue;
2262  #else
2263  // if all exponent bits are set, __x is either inf or NaN
2264  using _Ip = __int_for_sizeof_t<_Tp>;
2265  const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x));
2266  const auto __maxn
2267  = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__finite_max_v<_Tp>));
2268  return __absn <= __maxn;
2269  #endif
2270  }
2271 
2272  // _S_isunordered {{{3
2273  template <typename _Tp, size_t _Np>
2274  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2275  _S_isunordered(_SimdWrapper<_Tp, _Np> __x, _SimdWrapper<_Tp, _Np> __y)
2276  {
2277  return __or(_S_isnan(__x), _S_isnan(__y));
2278  }
2279 
2280  // _S_signbit {{{3
2281  template <typename _Tp, size_t _Np>
2282  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2283  _S_signbit(_SimdWrapper<_Tp, _Np> __x)
2284  {
2285  using _Ip = __int_for_sizeof_t<_Tp>;
2286  return __vector_bitcast<_Ip>(__x) < 0;
2287  // Arithmetic right shift (SRA) would also work (instead of compare), but
2288  // 64-bit SRA isn't available on x86 before AVX512. And in general,
2289  // compares are more likely to be efficient than SRA.
2290  }
2291 
2292  // _S_isinf {{{3
2293  template <typename _Tp, size_t _Np>
2294  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2295  _S_isinf([[maybe_unused]] _SimdWrapper<_Tp, _Np> __x)
2296  {
2297  #if __FINITE_MATH_ONLY__
2298  return {}; // false
2299  #else
2300  return _SuperImpl::template _S_equal_to<_Tp, _Np>(_SuperImpl::_S_abs(__x),
2301  __vector_broadcast<_Np>(
2302  __infinity_v<_Tp>));
2303  // alternative:
2304  // compare to inf using the corresponding integer type
2305  /*
2306  return
2307  __vector_bitcast<_Tp>(__vector_bitcast<__int_for_sizeof_t<_Tp>>(
2308  _S_abs(__x)._M_data)
2309  ==
2310  __vector_bitcast<__int_for_sizeof_t<_Tp>>(__vector_broadcast<_Np>(
2311  __infinity_v<_Tp>)));
2312  */
2313  #endif
2314  }
2315 
2316  // _S_isnormal {{{3
2317  template <typename _Tp, size_t _Np>
2318  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2319  _S_isnormal(_SimdWrapper<_Tp, _Np> __x)
2320  {
2321  using _Ip = __int_for_sizeof_t<_Tp>;
2322  const auto __absn = __vector_bitcast<_Ip>(_SuperImpl::_S_abs(__x));
2323  const auto __minn
2324  = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__norm_min_v<_Tp>));
2325  #if __FINITE_MATH_ONLY__
2326  return __absn >= __minn;
2327  #else
2328  const auto __maxn
2329  = __vector_bitcast<_Ip>(__vector_broadcast<_Np>(__finite_max_v<_Tp>));
2330  return __minn <= __absn && __absn <= __maxn;
2331  #endif
2332  }
2333 
2334  // _S_fpclassify {{{3
2335  template <typename _Tp, size_t _Np>
2336  _GLIBCXX_SIMD_INTRINSIC static __fixed_size_storage_t<int, _Np>
2337  _S_fpclassify(_SimdWrapper<_Tp, _Np> __x)
2338  {
2339  using _I = __int_for_sizeof_t<_Tp>;
2340  const auto __xn
2341  = __vector_bitcast<_I>(__to_intrin(_SuperImpl::_S_abs(__x)));
2342  constexpr size_t _NI = sizeof(__xn) / sizeof(_I);
2343  _GLIBCXX_SIMD_USE_CONSTEXPR auto __minn
2344  = __vector_bitcast<_I>(__vector_broadcast<_NI>(__norm_min_v<_Tp>));
2345  _GLIBCXX_SIMD_USE_CONSTEXPR auto __infn
2346  = __vector_bitcast<_I>(__vector_broadcast<_NI>(__infinity_v<_Tp>));
2347 
2348  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_normal
2349  = __vector_broadcast<_NI, _I>(FP_NORMAL);
2350  #if !__FINITE_MATH_ONLY__
2351  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_nan
2352  = __vector_broadcast<_NI, _I>(FP_NAN);
2353  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_infinite
2354  = __vector_broadcast<_NI, _I>(FP_INFINITE);
2355  #endif
2356  #ifndef __FAST_MATH__
2357  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_subnormal
2358  = __vector_broadcast<_NI, _I>(FP_SUBNORMAL);
2359  #endif
2360  _GLIBCXX_SIMD_USE_CONSTEXPR auto __fp_zero
2361  = __vector_broadcast<_NI, _I>(FP_ZERO);
2362 
2363  __vector_type_t<_I, _NI>
2364  __tmp = __xn < __minn
2365  #ifdef __FAST_MATH__
2366  ? __fp_zero
2367  #else
2368  ? (__xn == 0 ? __fp_zero : __fp_subnormal)
2369  #endif
2370  #if __FINITE_MATH_ONLY__
2371  : __fp_normal;
2372  #else
2373  : (__xn < __infn ? __fp_normal
2374  : (__xn == __infn ? __fp_infinite : __fp_nan));
2375  #endif
2376 
2377  if constexpr (sizeof(_I) == sizeof(int))
2378  {
2379  using _FixedInt = __fixed_size_storage_t<int, _Np>;
2380  const auto __as_int = __vector_bitcast<int, _Np>(__tmp);
2381  if constexpr (_FixedInt::_S_tuple_size == 1)
2382  return {__as_int};
2383  else if constexpr (_FixedInt::_S_tuple_size == 2
2384  && is_same_v<
2385  typename _FixedInt::_SecondType::_FirstAbi,
2386  simd_abi::scalar>)
2387  return {__extract<0, 2>(__as_int), __as_int[_Np - 1]};
2388  else if constexpr (_FixedInt::_S_tuple_size == 2)
2389  return {__extract<0, 2>(__as_int),
2390  __auto_bitcast(__extract<1, 2>(__as_int))};
2391  else
2392  __assert_unreachable<_Tp>();
2393  }
2394  else if constexpr (_Np == 2 && sizeof(_I) == 8
2395  && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 2)
2396  {
2397  const auto __aslong = __vector_bitcast<_LLong>(__tmp);
2398  return {int(__aslong[0]), {int(__aslong[1])}};
2399  }
2400  #if _GLIBCXX_SIMD_X86INTRIN
2401  else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 32
2402  && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
2403  return {_mm_packs_epi32(__to_intrin(__lo128(__tmp)),
2404  __to_intrin(__hi128(__tmp)))};
2405  else if constexpr (sizeof(_Tp) == 8 && sizeof(__tmp) == 64
2406  && __fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
2407  return {_mm512_cvtepi64_epi32(__to_intrin(__tmp))};
2408  #endif // _GLIBCXX_SIMD_X86INTRIN
2409  else if constexpr (__fixed_size_storage_t<int, _Np>::_S_tuple_size == 1)
2410  return {__call_with_subscripts<_Np>(__vector_bitcast<_LLong>(__tmp),
2411  [](auto... __l) {
2412  return __make_wrapper<int>(__l...);
2413  })};
2414  else
2415  __assert_unreachable<_Tp>();
2416  }
2417 
2418  // _S_increment & _S_decrement{{{2
2419  template <typename _Tp, size_t _Np>
2420  _GLIBCXX_SIMD_INTRINSIC static void
2421  _S_increment(_SimdWrapper<_Tp, _Np>& __x)
2422  { __x = __x._M_data + 1; }
2423 
2424  template <typename _Tp, size_t _Np>
2425  _GLIBCXX_SIMD_INTRINSIC static void
2426  _S_decrement(_SimdWrapper<_Tp, _Np>& __x)
2427  { __x = __x._M_data - 1; }
2428 
2429  // smart_reference access {{{2
2430  template <typename _Tp, size_t _Np, typename _Up>
2431  _GLIBCXX_SIMD_INTRINSIC constexpr static void
2432  _S_set(_SimdWrapper<_Tp, _Np>& __v, int __i, _Up&& __x) noexcept
2433  { __v._M_set(__i, static_cast<_Up&&>(__x)); }
2434 
2435  // _S_masked_assign{{{2
2436  template <typename _Tp, typename _K, size_t _Np>
2437  _GLIBCXX_SIMD_INTRINSIC static void
2438  _S_masked_assign(_SimdWrapper<_K, _Np> __k, _SimdWrapper<_Tp, _Np>& __lhs,
2439  __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs)
2440  {
2441  if (__k._M_is_constprop_none_of())
2442  return;
2443  else if (__k._M_is_constprop_all_of())
2444  __lhs = __rhs;
2445  else
2446  __lhs = _CommonImpl::_S_blend(__k, __lhs, __rhs);
2447  }
2448 
2449  template <typename _Tp, typename _K, size_t _Np>
2450  _GLIBCXX_SIMD_INTRINSIC static void
2451  _S_masked_assign(_SimdWrapper<_K, _Np> __k, _SimdWrapper<_Tp, _Np>& __lhs,
2452  __type_identity_t<_Tp> __rhs)
2453  {
2454  if (__k._M_is_constprop_none_of())
2455  return;
2456  else if (__k._M_is_constprop_all_of())
2457  __lhs = __vector_broadcast<_Np>(__rhs);
2458  else if (__builtin_constant_p(__rhs) && __rhs == 0)
2459  {
2460  if constexpr (!is_same_v<bool, _K>)
2461  // the __andnot optimization only makes sense if __k._M_data is a
2462  // vector register
2463  __lhs._M_data
2464  = __andnot(__vector_bitcast<_Tp>(__k), __lhs._M_data);
2465  else
2466  // for AVX512/__mmask, a _mm512_maskz_mov is best
2467  __lhs
2468  = _CommonImpl::_S_blend(__k, __lhs, _SimdWrapper<_Tp, _Np>());
2469  }
2470  else
2471  __lhs = _CommonImpl::_S_blend(__k, __lhs,
2472  _SimdWrapper<_Tp, _Np>(
2473  __vector_broadcast<_Np>(__rhs)));
2474  }
2475 
2476  // _S_masked_cassign {{{2
2477  template <typename _Op, typename _Tp, typename _K, size_t _Np>
2478  _GLIBCXX_SIMD_INTRINSIC static void
2479  _S_masked_cassign(const _SimdWrapper<_K, _Np> __k,
2480  _SimdWrapper<_Tp, _Np>& __lhs,
2481  const __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs,
2482  _Op __op)
2483  {
2484  if (__k._M_is_constprop_none_of())
2485  return;
2486  else if (__k._M_is_constprop_all_of())
2487  __lhs = __op(_SuperImpl{}, __lhs, __rhs);
2488  else
2489  __lhs = _CommonImpl::_S_blend(__k, __lhs,
2490  __op(_SuperImpl{}, __lhs, __rhs));
2491  }
2492 
2493  template <typename _Op, typename _Tp, typename _K, size_t _Np>
2494  _GLIBCXX_SIMD_INTRINSIC static void
2495  _S_masked_cassign(const _SimdWrapper<_K, _Np> __k,
2496  _SimdWrapper<_Tp, _Np>& __lhs,
2497  const __type_identity_t<_Tp> __rhs, _Op __op)
2498  { _S_masked_cassign(__k, __lhs, __vector_broadcast<_Np>(__rhs), __op); }
2499 
2500  // _S_masked_unary {{{2
2501  template <template <typename> class _Op, typename _Tp, typename _K,
2502  size_t _Np>
2503  _GLIBCXX_SIMD_INTRINSIC static _SimdWrapper<_Tp, _Np>
2504  _S_masked_unary(const _SimdWrapper<_K, _Np> __k,
2505  const _SimdWrapper<_Tp, _Np> __v)
2506  {
2507  if (__k._M_is_constprop_none_of())
2508  return __v;
2509  auto __vv = _M_make_simd(__v);
2510  _Op<decltype(__vv)> __op;
2511  if (__k._M_is_constprop_all_of())
2512  return __data(__op(__vv));
2513  else
2514  return _CommonImpl::_S_blend(__k, __v, __data(__op(__vv)));
2515  }
2516 
2517  //}}}2
2518  };
2519 
2520 // _MaskImplBuiltinMixin {{{1
2521 struct _MaskImplBuiltinMixin
2522 {
2523  template <typename _Tp>
2524  using _TypeTag = _Tp*;
2525 
2526  // _S_to_maskvector {{{
2527  template <typename _Up, size_t _ToN = 1>
2528  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN>
2529  _S_to_maskvector(bool __x)
2530  {
2531  static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>);
2532  return __x ? __vector_type_t<_Up, _ToN>{~_Up()}
2533  : __vector_type_t<_Up, _ToN>{};
2534  }
2535 
2536  template <typename _Up, size_t _UpN = 0, size_t _Np, bool _Sanitized,
2537  size_t _ToN = _UpN == 0 ? _Np : _UpN>
2538  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN>
2539  _S_to_maskvector(_BitMask<_Np, _Sanitized> __x)
2540  {
2541  static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>);
2542  return __generate_vector<__vector_type_t<_Up, _ToN>>([&](
2543  auto __i) constexpr {
2544  if constexpr (__i < _Np)
2545  return __x[__i] ? ~_Up() : _Up();
2546  else
2547  return _Up();
2548  });
2549  }
2550 
2551  template <typename _Up, size_t _UpN = 0, typename _Tp, size_t _Np,
2552  size_t _ToN = _UpN == 0 ? _Np : _UpN>
2553  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Up, _ToN>
2554  _S_to_maskvector(_SimdWrapper<_Tp, _Np> __x)
2555  {
2556  static_assert(is_same_v<_Up, __int_for_sizeof_t<_Up>>);
2557  using _TW = _SimdWrapper<_Tp, _Np>;
2558  using _UW = _SimdWrapper<_Up, _ToN>;
2559  if constexpr (sizeof(_Up) == sizeof(_Tp) && sizeof(_TW) == sizeof(_UW))
2560  return __wrapper_bitcast<_Up, _ToN>(__x);
2561  else if constexpr (is_same_v<_Tp, bool>) // bits -> vector
2562  return _S_to_maskvector<_Up, _ToN>(_BitMask<_Np>(__x._M_data));
2563  else
2564  { // vector -> vector
2565  /*
2566  [[maybe_unused]] const auto __y = __vector_bitcast<_Up>(__x._M_data);
2567  if constexpr (sizeof(_Tp) == 8 && sizeof(_Up) == 4 && sizeof(__y) ==
2568  16) return __vector_permute<1, 3, -1, -1>(__y); else if constexpr
2569  (sizeof(_Tp) == 4 && sizeof(_Up) == 2
2570  && sizeof(__y) == 16)
2571  return __vector_permute<1, 3, 5, 7, -1, -1, -1, -1>(__y);
2572  else if constexpr (sizeof(_Tp) == 8 && sizeof(_Up) == 2
2573  && sizeof(__y) == 16)
2574  return __vector_permute<3, 7, -1, -1, -1, -1, -1, -1>(__y);
2575  else if constexpr (sizeof(_Tp) == 2 && sizeof(_Up) == 1
2576  && sizeof(__y) == 16)
2577  return __vector_permute<1, 3, 5, 7, 9, 11, 13, 15, -1, -1, -1, -1,
2578  -1, -1, -1, -1>(__y); else if constexpr (sizeof(_Tp) == 4 &&
2579  sizeof(_Up) == 1
2580  && sizeof(__y) == 16)
2581  return __vector_permute<3, 7, 11, 15, -1, -1, -1, -1, -1, -1, -1,
2582  -1, -1, -1, -1, -1>(__y); else if constexpr (sizeof(_Tp) == 8 &&
2583  sizeof(_Up) == 1
2584  && sizeof(__y) == 16)
2585  return __vector_permute<7, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
2586  -1, -1, -1, -1, -1>(__y); else
2587  */
2588  {
2589  return __generate_vector<__vector_type_t<_Up, _ToN>>([&](
2590  auto __i) constexpr {
2591  if constexpr (__i < _Np)
2592  return _Up(__x[__i.value]);
2593  else
2594  return _Up();
2595  });
2596  }
2597  }
2598  }
2599 
2600  // }}}
2601  // _S_to_bits {{{
2602  template <typename _Tp, size_t _Np>
2603  _GLIBCXX_SIMD_INTRINSIC static constexpr _SanitizedBitMask<_Np>
2604  _S_to_bits(_SimdWrapper<_Tp, _Np> __x)
2605  {
2606  static_assert(!is_same_v<_Tp, bool>);
2607  static_assert(_Np <= __CHAR_BIT__ * sizeof(_ULLong));
2608  using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
2609  const auto __bools
2610  = __vector_bitcast<_Up>(__x) >> (sizeof(_Up) * __CHAR_BIT__ - 1);
2611  _ULLong __r = 0;
2612  __execute_n_times<_Np>(
2613  [&](auto __i) { __r |= _ULLong(__bools[__i.value]) << __i; });
2614  return __r;
2615  }
2616 
2617  // }}}
2618 };
2619 
2620 // _MaskImplBuiltin {{{1
2621 template <typename _Abi>
2622  struct _MaskImplBuiltin : _MaskImplBuiltinMixin
2623  {
2624  using _MaskImplBuiltinMixin::_S_to_bits;
2625  using _MaskImplBuiltinMixin::_S_to_maskvector;
2626 
2627  // member types {{{
2628  template <typename _Tp>
2629  using _SimdMember = typename _Abi::template __traits<_Tp>::_SimdMember;
2630 
2631  template <typename _Tp>
2632  using _MaskMember = typename _Abi::template _MaskMember<_Tp>;
2633 
2634  using _SuperImpl = typename _Abi::_MaskImpl;
2635  using _CommonImpl = typename _Abi::_CommonImpl;
2636 
2637  template <typename _Tp>
2638  static constexpr size_t _S_size = simd_size_v<_Tp, _Abi>;
2639 
2640  // }}}
2641  // _S_broadcast {{{
2642  template <typename _Tp>
2643  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
2644  _S_broadcast(bool __x)
2645  {
2646  return __x ? _Abi::template _S_implicit_mask<_Tp>()
2647  : _MaskMember<_Tp>();
2648  }
2649 
2650  // }}}
2651  // _S_load {{{
2652  template <typename _Tp>
2653  _GLIBCXX_SIMD_INTRINSIC static constexpr _MaskMember<_Tp>
2654  _S_load(const bool* __mem)
2655  {
2656  using _I = __int_for_sizeof_t<_Tp>;
2657  if constexpr (sizeof(_Tp) == sizeof(bool))
2658  {
2659  const auto __bools
2660  = _CommonImpl::template _S_load<_I, _S_size<_Tp>>(__mem);
2661  // bool is {0, 1}, everything else is UB
2662  return __bools > 0;
2663  }
2664  else
2665  return __generate_vector<_I, _S_size<_Tp>>([&](auto __i) constexpr {
2666  return __mem[__i] ? ~_I() : _I();
2667  });
2668  }
2669 
2670  // }}}
2671  // _S_convert {{{
2672  template <typename _Tp, size_t _Np, bool _Sanitized>
2673  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
2674  _S_convert(_BitMask<_Np, _Sanitized> __x)
2675  {
2676  if constexpr (__is_builtin_bitmask_abi<_Abi>())
2677  return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>(__x._M_to_bits());
2678  else
2679  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
2680  _S_size<_Tp>>(
2681  __x._M_sanitized());
2682  }
2683 
2684  template <typename _Tp, size_t _Np>
2685  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
2686  _S_convert(_SimdWrapper<bool, _Np> __x)
2687  {
2688  if constexpr (__is_builtin_bitmask_abi<_Abi>())
2689  return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>(__x._M_data);
2690  else
2691  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
2692  _S_size<_Tp>>(
2693  _BitMask<_Np>(__x._M_data)._M_sanitized());
2694  }
2695 
2696  template <typename _Tp, typename _Up, size_t _Np>
2697  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
2698  _S_convert(_SimdWrapper<_Up, _Np> __x)
2699  {
2700  if constexpr (__is_builtin_bitmask_abi<_Abi>())
2701  return _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>(
2702  _SuperImpl::_S_to_bits(__x));
2703  else
2704  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
2705  _S_size<_Tp>>(__x);
2706  }
2707 
2708  template <typename _Tp, typename _Up, typename _UAbi>
2709  _GLIBCXX_SIMD_INTRINSIC static constexpr auto
2710  _S_convert(simd_mask<_Up, _UAbi> __x)
2711  {
2712  if constexpr (__is_builtin_bitmask_abi<_Abi>())
2713  {
2714  using _R = _SimdWrapper<bool, simd_size_v<_Tp, _Abi>>;
2715  if constexpr (__is_builtin_bitmask_abi<_UAbi>()) // bits -> bits
2716  return _R(__data(__x));
2717  else if constexpr (__is_scalar_abi<_UAbi>()) // bool -> bits
2718  return _R(__data(__x));
2719  else if constexpr (__is_fixed_size_abi_v<_UAbi>) // bitset -> bits
2720  return _R(__data(__x)._M_to_bits());
2721  else // vector -> bits
2722  return _R(_UAbi::_MaskImpl::_S_to_bits(__data(__x))._M_to_bits());
2723  }
2724  else
2725  return _SuperImpl::template _S_to_maskvector<__int_for_sizeof_t<_Tp>,
2726  _S_size<_Tp>>(
2727  __data(__x));
2728  }
2729 
2730  // }}}
2731  // _S_masked_load {{{2
2732  template <typename _Tp, size_t _Np>
2733  static inline _SimdWrapper<_Tp, _Np>
2734  _S_masked_load(_SimdWrapper<_Tp, _Np> __merge,
2735  _SimdWrapper<_Tp, _Np> __mask, const bool* __mem) noexcept
2736  {
2737  // AVX(2) has 32/64 bit maskload, but nothing at 8 bit granularity
2738  auto __tmp = __wrapper_bitcast<__int_for_sizeof_t<_Tp>>(__merge);
2739  _BitOps::_S_bit_iteration(_SuperImpl::_S_to_bits(__mask),
2740  [&](auto __i) {
2741  __tmp._M_set(__i, -__mem[__i]);
2742  });
2743  __merge = __wrapper_bitcast<_Tp>(__tmp);
2744  return __merge;
2745  }
2746 
2747  // _S_store {{{2
2748  template <typename _Tp, size_t _Np>
2749  _GLIBCXX_SIMD_INTRINSIC static void _S_store(_SimdWrapper<_Tp, _Np> __v,
2750  bool* __mem) noexcept
2751  {
2752  __execute_n_times<_Np>([&](auto __i) constexpr {
2753  __mem[__i] = __v[__i];
2754  });
2755  }
2756 
2757  // _S_masked_store {{{2
2758  template <typename _Tp, size_t _Np>
2759  static inline void
2760  _S_masked_store(const _SimdWrapper<_Tp, _Np> __v, bool* __mem,
2761  const _SimdWrapper<_Tp, _Np> __k) noexcept
2762  {
2763  _BitOps::_S_bit_iteration(
2764  _SuperImpl::_S_to_bits(__k), [&](auto __i) constexpr {
2765  __mem[__i] = __v[__i];
2766  });
2767  }
2768 
2769  // _S_from_bitmask{{{2
2770  template <size_t _Np, typename _Tp>
2771  _GLIBCXX_SIMD_INTRINSIC static _MaskMember<_Tp>
2772  _S_from_bitmask(_SanitizedBitMask<_Np> __bits, _TypeTag<_Tp>)
2773  {
2774  return _SuperImpl::template _S_to_maskvector<_Tp, _S_size<_Tp>>(__bits);
2775  }
2776 
2777  // logical and bitwise operators {{{2
2778  template <typename _Tp, size_t _Np>
2779  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2780  _S_logical_and(const _SimdWrapper<_Tp, _Np>& __x,
2781  const _SimdWrapper<_Tp, _Np>& __y)
2782  { return __and(__x._M_data, __y._M_data); }
2783 
2784  template <typename _Tp, size_t _Np>
2785  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2786  _S_logical_or(const _SimdWrapper<_Tp, _Np>& __x,
2787  const _SimdWrapper<_Tp, _Np>& __y)
2788  { return __or(__x._M_data, __y._M_data); }
2789 
2790  template <typename _Tp, size_t _Np>
2791  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2792  _S_bit_not(const _SimdWrapper<_Tp, _Np>& __x)
2793  {
2794  if constexpr (_Abi::template _S_is_partial<_Tp>)
2795  return __andnot(__x, __wrapper_bitcast<_Tp>(
2796  _Abi::template _S_implicit_mask<_Tp>()));
2797  else
2798  return __not(__x._M_data);
2799  }
2800 
2801  template <typename _Tp, size_t _Np>
2802  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2803  _S_bit_and(const _SimdWrapper<_Tp, _Np>& __x,
2804  const _SimdWrapper<_Tp, _Np>& __y)
2805  { return __and(__x._M_data, __y._M_data); }
2806 
2807  template <typename _Tp, size_t _Np>
2808  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2809  _S_bit_or(const _SimdWrapper<_Tp, _Np>& __x,
2810  const _SimdWrapper<_Tp, _Np>& __y)
2811  { return __or(__x._M_data, __y._M_data); }
2812 
2813  template <typename _Tp, size_t _Np>
2814  _GLIBCXX_SIMD_INTRINSIC static constexpr _SimdWrapper<_Tp, _Np>
2815  _S_bit_xor(const _SimdWrapper<_Tp, _Np>& __x,
2816  const _SimdWrapper<_Tp, _Np>& __y)
2817  { return __xor(__x._M_data, __y._M_data); }
2818 
2819  // smart_reference access {{{2
2820  template <typename _Tp, size_t _Np>
2821  static constexpr void _S_set(_SimdWrapper<_Tp, _Np>& __k, int __i,
2822  bool __x) noexcept
2823  {
2824  if constexpr (is_same_v<_Tp, bool>)
2825  __k._M_set(__i, __x);
2826  else
2827  {
2828  static_assert(is_same_v<_Tp, __int_for_sizeof_t<_Tp>>);
2829  if (__builtin_is_constant_evaluated())
2830  {
2831  __k = __generate_from_n_evaluations<_Np,
2832  __vector_type_t<_Tp, _Np>>(
2833  [&](auto __j) {
2834  if (__i == __j)
2835  return _Tp(-__x);
2836  else
2837  return __k[+__j];
2838  });
2839  }
2840  else
2841  __k._M_data[__i] = -__x;
2842  }
2843  }
2844 
2845  // _S_masked_assign{{{2
2846  template <typename _Tp, size_t _Np>
2847  _GLIBCXX_SIMD_INTRINSIC static void
2848  _S_masked_assign(_SimdWrapper<_Tp, _Np> __k,
2849  _SimdWrapper<_Tp, _Np>& __lhs,
2850  __type_identity_t<_SimdWrapper<_Tp, _Np>> __rhs)
2851  { __lhs = _CommonImpl::_S_blend(__k, __lhs, __rhs); }
2852 
2853  template <typename _Tp, size_t _Np>
2854  _GLIBCXX_SIMD_INTRINSIC static void
2855  _S_masked_assign(_SimdWrapper<_Tp, _Np> __k,
2856  _SimdWrapper<_Tp, _Np>& __lhs, bool __rhs)
2857  {
2858  if (__builtin_constant_p(__rhs))
2859  {
2860  if (__rhs == false)
2861  __lhs = __andnot(__k, __lhs);
2862  else
2863  __lhs = __or(__k, __lhs);
2864  return;
2865  }
2866  __lhs = _CommonImpl::_S_blend(__k, __lhs,
2867  __data(simd_mask<_Tp, _Abi>(__rhs)));
2868  }
2869 
2870  //}}}2
2871  // _S_all_of {{{
2872  template <typename _Tp>
2873  _GLIBCXX_SIMD_INTRINSIC static bool
2874  _S_all_of(simd_mask<_Tp, _Abi> __k)
2875  {
2876  return __call_with_subscripts(
2877  __data(__k), make_index_sequence<_S_size<_Tp>>(),
2878  [](const auto... __ent) constexpr { return (... && !(__ent == 0)); });
2879  }
2880 
2881  // }}}
2882  // _S_any_of {{{
2883  template <typename _Tp>
2884  _GLIBCXX_SIMD_INTRINSIC static bool
2885  _S_any_of(simd_mask<_Tp, _Abi> __k)
2886  {
2887  return __call_with_subscripts(
2888  __data(__k), make_index_sequence<_S_size<_Tp>>(),
2889  [](const auto... __ent) constexpr { return (... || !(__ent == 0)); });
2890  }
2891 
2892  // }}}
2893  // _S_none_of {{{
2894  template <typename _Tp>
2895  _GLIBCXX_SIMD_INTRINSIC static bool
2896  _S_none_of(simd_mask<_Tp, _Abi> __k)
2897  {
2898  return __call_with_subscripts(
2899  __data(__k), make_index_sequence<_S_size<_Tp>>(),
2900  [](const auto... __ent) constexpr { return (... && (__ent == 0)); });
2901  }
2902 
2903  // }}}
2904  // _S_some_of {{{
2905  template <typename _Tp>
2906  _GLIBCXX_SIMD_INTRINSIC static bool
2907  _S_some_of(simd_mask<_Tp, _Abi> __k)
2908  {
2909  const int __n_true = _SuperImpl::_S_popcount(__k);
2910  return __n_true > 0 && __n_true < int(_S_size<_Tp>);
2911  }
2912 
2913  // }}}
2914  // _S_popcount {{{
2915  template <typename _Tp>
2916  _GLIBCXX_SIMD_INTRINSIC static int
2917  _S_popcount(simd_mask<_Tp, _Abi> __k)
2918  {
2919  using _I = __int_for_sizeof_t<_Tp>;
2920  if constexpr (is_default_constructible_v<simd<_I, _Abi>>)
2921  return -reduce(
2922  simd<_I, _Abi>(__private_init, __wrapper_bitcast<_I>(__data(__k))));
2923  else
2924  return -reduce(__bit_cast<rebind_simd_t<_I, simd<_Tp, _Abi>>>(
2925  simd<_Tp, _Abi>(__private_init, __data(__k))));
2926  }
2927 
2928  // }}}
2929  // _S_find_first_set {{{
2930  template <typename _Tp>
2931  _GLIBCXX_SIMD_INTRINSIC static int
2932  _S_find_first_set(simd_mask<_Tp, _Abi> __k)
2933  {
2934  return std::__countr_zero(
2935  _SuperImpl::_S_to_bits(__data(__k))._M_to_bits());
2936  }
2937 
2938  // }}}
2939  // _S_find_last_set {{{
2940  template <typename _Tp>
2941  _GLIBCXX_SIMD_INTRINSIC static int
2942  _S_find_last_set(simd_mask<_Tp, _Abi> __k)
2943  {
2944  return std::__bit_width(
2945  _SuperImpl::_S_to_bits(__data(__k))._M_to_bits()) - 1;
2946  }
2947 
2948  // }}}
2949  };
2950 
2951 //}}}1
2952 _GLIBCXX_SIMD_END_NAMESPACE
2953 #endif // __cplusplus >= 201703L
2954 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_
2955 
2956 // vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=80
complex< _Tp > log10(const complex< _Tp > &)
Return complex base 10 logarithm of z.
Definition: complex:829
complex< _Tp > sin(const complex< _Tp > &)
Return complex sine of z.
Definition: complex:859
complex< _Tp > log(const complex< _Tp > &)
Return complex natural logarithm of z.
Definition: complex:824
complex< _Tp > tan(const complex< _Tp > &)
Return complex tangent of z.
Definition: complex:960
complex< _Tp > exp(const complex< _Tp > &)
Return complex base e exponential of z.
Definition: complex:797
complex< _Tp > cosh(const complex< _Tp > &)
Return complex hyperbolic cosine of z.
Definition: complex:771
complex< _Tp > tanh(const complex< _Tp > &)
Return complex hyperbolic tangent of z.
Definition: complex:988
complex< _Tp > pow(const complex< _Tp > &, int)
Return x to the y'th power.
Definition: complex:1019
complex< _Tp > sinh(const complex< _Tp > &)
Return complex hyperbolic sine of z.
Definition: complex:889
complex< _Tp > cos(const complex< _Tp > &)
Return complex cosine of z.
Definition: complex:741
complex< _Tp > sqrt(const complex< _Tp > &)
Return complex square root of z.
Definition: complex:933
integral_constant< bool, true > true_type
The type used as a compile-time boolean with true value.
Definition: type_traits:82
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition: type_traits:2618
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:230
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition: numeric:278
_Tp fabs(const std::complex< _Tp > &)
fabs(__z) [8.1.8].
Definition: complex:1817
std::complex< _Tp > asinh(const std::complex< _Tp > &)
asinh(__z) [8.1.6].
Definition: complex:1764
std::complex< _Tp > atan(const std::complex< _Tp > &)
atan(__z) [8.1.4].
Definition: complex:1689
make_integer_sequence< size_t, _Num > make_index_sequence
Alias template make_index_sequence.
Definition: utility.h:185
std::complex< _Tp > atanh(const std::complex< _Tp > &)
atanh(__z) [8.1.7].
Definition: complex:1808
std::complex< _Tp > acosh(const std::complex< _Tp > &)
acosh(__z) [8.1.5].
Definition: complex:1725
std::complex< _Tp > acos(const std::complex< _Tp > &)
acos(__z) [8.1.2].
Definition: complex:1609
std::complex< _Tp > asin(const std::complex< _Tp > &)
asin(__z) [8.1.3].
Definition: complex:1645