17#ifndef dealii_vectorization_h
18#define dealii_vectorization_h
44#if DEAL_II_VECTORIZATION_WIDTH_IN_BITS > 0
53# if DEAL_II_VECTORIZATION_WIDTH_IN_BITS >= 256 && !defined(__AVX__)
55 "Mismatch in vectorization capabilities: AVX was detected during configuration of deal.II and switched on, but it is apparently not available for the file you are trying to compile at the moment. Check compilation flags controlling the instruction set, such as -march=native."
57# if DEAL_II_VECTORIZATION_WIDTH_IN_BITS >= 512 && !defined(__AVX512F__)
59 "Mismatch in vectorization capabilities: AVX-512F was detected during configuration of deal.II and switched on, but it is apparently not available for the file you are trying to compile at the moment. Check compilation flags controlling the instruction set, such as -march=native."
64# elif defined(__ALTIVEC__)
73# include <x86intrin.h>
85template <
typename Number, std::
size_t w
idth>
119 "You are trying to compare iterators into different arrays."));
120 return this->
lane == other.lane;
131 "You are trying to compare iterators into different arrays."));
132 return this->
lane != other.lane;
145 const typename T::value_type &
157 template <
typename U = T>
158 std::enable_if_t<!std::is_same<U, const U>::value,
typename T::value_type> &
201 "You can't decrement an iterator that is already at the beginning of the range."));
222 return static_cast<std::ptrdiff_t
>(
lane) -
223 static_cast<ptrdiff_t
>(
other.lane);
249template <
typename T, std::
size_t w
idth>
261 template <
typename U>
272 "Initializer list exceeds size of this VectorizedArray object."));
277 for (;
i0 != this->
end(); ++
i0)
286 static constexpr std::size_t
418template <
typename Number, std::
size_t w
idth>
428 static_assert(width == 1,
429 "You specified an illegal width that is not supported.");
448 template <
typename U>
548 template <
typename OtherNumber>
561 template <
typename OtherNumber>
731 template <
typename Number2, std::
size_t w
idth2>
734 template <
typename Number2, std::
size_t w
idth2>
737 template <
typename Number2, std::
size_t w
idth2>
741 template <
typename Number2, std::
size_t w
idth2>
760template <
typename Number,
778template <
typename VectorizedArrayType>
783 std::is_same<VectorizedArrayType,
785 VectorizedArrayType::size()>>::value,
786 "VectorizedArrayType is not a VectorizedArray.");
788 VectorizedArrayType
result =
u;
805template <
typename Number, std::
size_t w
idth>
808 const std::array<Number *, width> &
ptrs,
809 const unsigned int offset)
811 for (
unsigned int v = 0; v < width; ++v)
842template <
typename Number, std::
size_t w
idth>
849 for (
unsigned int i = 0; i <
n_entries; ++i)
851 out[i][v] = in[
offsets[v] + i];
866template <
typename Number, std::
size_t w
idth>
869 const std::array<Number *, width> &in,
872 for (
unsigned int i = 0; i <
n_entries; ++i)
874 out[i][v] = in[v][i];
917template <
typename Number, std::
size_t w
idth>
926 for (
unsigned int i = 0; i <
n_entries; ++i)
928 out[
offsets[v] + i] += in[i][v];
930 for (
unsigned int i = 0; i <
n_entries; ++i)
932 out[
offsets[v] + i] = in[i][v];
947template <
typename Number, std::
size_t w
idth>
952 std::array<Number *, width> & out)
955 for (
unsigned int i = 0; i <
n_entries; ++i)
957 out[v][i] += in[i][v];
959 for (
unsigned int i = 0; i <
n_entries; ++i)
961 out[v][i] = in[i][v];
969# if DEAL_II_VECTORIZATION_WIDTH_IN_BITS >= 128 && defined(__SSE2__)
1001 template <
typename U>
1023 operator=(
const double scalar) && =
delete;
1033 return *(
reinterpret_cast<double *
>(&
data) +
comp);
1044 return *(
reinterpret_cast<const double *
>(&
data) +
comp);
1054# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
1069# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
1084# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
1099# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
1114 load(
const double *ptr)
1121 load(
const float *ptr)
1124 for (
unsigned int i = 0; i < 2; ++i)
1136 store(
double *ptr)
const
1143 store(
float *ptr)
const
1146 for (
unsigned int i = 0; i < 2; ++i)
1158 Assert(
reinterpret_cast<std::size_t
>(ptr) % 16 == 0,
1179 for (
unsigned int i = 0; i < 2; ++i)
1199 for (
unsigned int i = 0; i < 2; ++i)
1281 template <
typename Number2, std::
size_t w
idth2>
1284 template <
typename Number2, std::
size_t w
idth2>
1287 template <
typename Number2, std::
size_t w
idth2>
1291 template <
typename Number2, std::
size_t w
idth2>
1309 const unsigned int n_chunks =
n_entries / 2;
1310 for (
unsigned int i = 0; i < n_chunks; ++i)
1319 for (
unsigned int i = 2 * n_chunks; i <
n_entries; ++i)
1320 for (
unsigned int v = 0; v < 2; ++v)
1321 out[i][v] = in[
offsets[v] + i];
1332 const std::array<double *, 2> &in,
1337 const unsigned int n_chunks =
n_entries / 2;
1338 for (
unsigned int i = 0; i < n_chunks; ++i)
1346 for (
unsigned int i = 2 * n_chunks; i <
n_entries; ++i)
1347 for (
unsigned int v = 0; v < 2; ++v)
1348 out[i][v] = in[v][i];
1364 const unsigned int n_chunks =
n_entries / 2;
1367 for (
unsigned int i = 0; i < n_chunks; ++i)
1381 for (
unsigned int i = 2 * n_chunks; i <
n_entries; ++i)
1382 for (
unsigned int v = 0; v < 2; ++v)
1383 out[
offsets[v] + i] += in[i][v];
1387 for (
unsigned int i = 0; i < n_chunks; ++i)
1397 for (
unsigned int i = 2 * n_chunks; i <
n_entries; ++i)
1398 for (
unsigned int v = 0; v < 2; ++v)
1399 out[
offsets[v] + i] = in[i][v];
1413 std::array<double *, 2> & out)
1417 const unsigned int n_chunks =
n_entries / 2;
1420 for (
unsigned int i = 0; i < n_chunks; ++i)
1432 for (
unsigned int i = 2 * n_chunks; i <
n_entries; ++i)
1433 for (
unsigned int v = 0; v < 2; ++v)
1434 out[v][i] += in[i][v];
1438 for (
unsigned int i = 0; i < n_chunks; ++i)
1448 for (
unsigned int i = 2 * n_chunks; i <
n_entries; ++i)
1449 for (
unsigned int v = 0; v < 2; ++v)
1450 out[v][i] = in[i][v];
1490 template <
typename U>
1509 operator=(
const float scalar) && =
delete;
1519 return *(
reinterpret_cast<float *
>(&
data) +
comp);
1530 return *(
reinterpret_cast<const float *
>(&
data) +
comp);
1540# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
1555# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
1570# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
1585# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
1600 load(
const float *ptr)
1613 store(
float *ptr)
const
1626 Assert(
reinterpret_cast<std::size_t
>(ptr) % 16 == 0,
1647 for (
unsigned int i = 0; i < 4; ++i)
1667 for (
unsigned int i = 0; i < 4; ++i)
1750 template <
typename Number2, std::
size_t w
idth2>
1753 template <
typename Number2, std::
size_t w
idth2>
1756 template <
typename Number2, std::
size_t w
idth2>
1760 template <
typename Number2, std::
size_t w
idth2>
1778 const unsigned int n_chunks =
n_entries / 4;
1779 for (
unsigned int i = 0; i < n_chunks; ++i)
1796 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
1797 for (
unsigned int v = 0; v < 4; ++v)
1798 out[i][v] = in[
offsets[v] + i];
1809 const std::array<float *, 4> &in,
1814 const unsigned int n_chunks =
n_entries / 4;
1815 for (
unsigned int i = 0; i < n_chunks; ++i)
1831 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
1832 for (
unsigned int v = 0; v < 4; ++v)
1833 out[i][v] = in[v][i];
1849 const unsigned int n_chunks =
n_entries / 4;
1850 for (
unsigned int i = 0; i < n_chunks; ++i)
1890 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
1891 for (
unsigned int v = 0; v < 4; ++v)
1892 out[
offsets[v] + i] += in[i][v];
1894 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
1895 for (
unsigned int v = 0; v < 4; ++v)
1896 out[
offsets[v] + i] = in[i][v];
1909 std::array<float *, 4> & out)
1913 const unsigned int n_chunks =
n_entries / 4;
1914 for (
unsigned int i = 0; i < n_chunks; ++i)
1950 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
1951 for (
unsigned int v = 0; v < 4; ++v)
1952 out[v][i] += in[i][v];
1954 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
1955 for (
unsigned int v = 0; v < 4; ++v)
1956 out[v][i] = in[i][v];
1963# if DEAL_II_VECTORIZATION_WIDTH_IN_BITS >= 256 && defined(__AVX__)
1995 template <
typename U>
2017 operator=(
const double scalar) && =
delete;
2027 return *(
reinterpret_cast<double *
>(&
data) +
comp);
2038 return *(
reinterpret_cast<const double *
>(&
data) +
comp);
2053# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
2068# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
2082# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
2097# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
2112 load(
const double *ptr)
2119 load(
const float *ptr)
2132 store(
double *ptr)
const
2139 store(
float *ptr)
const
2152 Assert(
reinterpret_cast<std::size_t
>(ptr) % 32 == 0,
2189 for (
unsigned int i = 0; i < 4; ++i)
2211 for (
unsigned int i = 0; i < 4; ++i)
2312 template <
typename Number2, std::
size_t w
idth2>
2315 template <
typename Number2, std::
size_t w
idth2>
2318 template <
typename Number2, std::
size_t w
idth2>
2322 template <
typename Number2, std::
size_t w
idth2>
2340 const unsigned int n_chunks =
n_entries / 4;
2346 for (
unsigned int i = 0; i < n_chunks; ++i)
2363 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
2375 const std::array<double *, 4> &in,
2380 const unsigned int n_chunks =
n_entries / 4;
2381 const double *
in0 = in[0];
2382 const double *
in1 = in[1];
2383 const double *
in2 = in[2];
2384 const double *
in3 = in[3];
2386 for (
unsigned int i = 0; i < n_chunks; ++i)
2402 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
2419 const unsigned int n_chunks =
n_entries / 4;
2424 for (
unsigned int i = 0; i < n_chunks; ++i)
2464 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
2465 for (
unsigned int v = 0; v < 4; ++v)
2466 out[
offsets[v] + i] += in[i][v];
2468 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
2469 for (
unsigned int v = 0; v < 4; ++v)
2470 out[
offsets[v] + i] = in[i][v];
2483 std::array<double *, 4> & out)
2487 const unsigned int n_chunks =
n_entries / 4;
2488 double *
out0 = out[0];
2489 double *
out1 = out[1];
2490 double *
out2 = out[2];
2491 double *
out3 = out[3];
2492 for (
unsigned int i = 0; i < n_chunks; ++i)
2532 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
2533 for (
unsigned int v = 0; v < 4; ++v)
2534 out[v][i] += in[i][v];
2536 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
2537 for (
unsigned int v = 0; v < 4; ++v)
2538 out[v][i] = in[i][v];
2573 template <
typename U>
2595 operator=(
const float scalar) && =
delete;
2605 return *(
reinterpret_cast<float *
>(&
data) +
comp);
2616 return *(
reinterpret_cast<const float *
>(&
data) +
comp);
2631# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
2646# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
2660# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
2675# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
2690 load(
const float *ptr)
2703 store(
float *ptr)
const
2716 Assert(
reinterpret_cast<std::size_t
>(ptr) % 32 == 0,
2753 for (
unsigned int i = 0; i < 8; ++i)
2775 for (
unsigned int i = 0; i < 8; ++i)
2876 template <
typename Number2, std::
size_t w
idth2>
2879 template <
typename Number2, std::
size_t w
idth2>
2882 template <
typename Number2, std::
size_t w
idth2>
2886 template <
typename Number2, std::
size_t w
idth2>
2904 const unsigned int n_chunks =
n_entries / 4;
2905 for (
unsigned int i = 0; i < n_chunks; ++i)
2930 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
2942 const std::array<float *, 8> &in,
2947 const unsigned int n_chunks =
n_entries / 4;
2948 for (
unsigned int i = 0; i < n_chunks; ++i)
2970 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
2987 const unsigned int n_chunks =
n_entries / 4;
2988 for (
unsigned int i = 0; i < n_chunks; ++i)
3048 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
3049 for (
unsigned int v = 0; v < 8; ++v)
3050 out[
offsets[v] + i] += in[i][v];
3052 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
3053 for (
unsigned int v = 0; v < 8; ++v)
3054 out[
offsets[v] + i] = in[i][v];
3067 std::array<float *, 8> & out)
3071 const unsigned int n_chunks =
n_entries / 4;
3072 for (
unsigned int i = 0; i < n_chunks; ++i)
3128 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
3129 for (
unsigned int v = 0; v < 8; ++v)
3130 out[v][i] += in[i][v];
3132 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
3133 for (
unsigned int v = 0; v < 8; ++v)
3134 out[v][i] = in[i][v];
3142# if DEAL_II_VECTORIZATION_WIDTH_IN_BITS >= 512 && defined(__AVX512F__)
3174 template <
typename U>
3197 operator=(
const double scalar) && =
delete;
3207 return *(
reinterpret_cast<double *
>(&
data) +
comp);
3218 return *(
reinterpret_cast<const double *
>(&
data) +
comp);
3233# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
3248# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
3262# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
3277# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
3292 load(
const double *ptr)
3299 load(
const float *ptr)
3312 store(
double *ptr)
const
3319 store(
float *ptr)
const
3332 Assert(
reinterpret_cast<std::size_t
>(ptr) % 64 == 0,
3385 for (
unsigned int i = 0; i < 8; ++i)
3386 for (
unsigned int j = i + 1;
j < 8; ++
j)
3388 ExcMessage(
"Result of scatter undefined if two offset elements"
3389 " point to the same position"));
3501 template <
typename Number2, std::
size_t w
idth2>
3504 template <
typename Number2, std::
size_t w
idth2>
3507 template <
typename Number2, std::
size_t w
idth2>
3511 template <
typename Number2, std::
size_t w
idth2>
3534 const unsigned int n_chunks =
n_entries / 4;
3535 for (
unsigned int i = 0; i < n_chunks; ++i)
3558 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
3570 const std::array<double *, 8> &in,
3573 const unsigned int n_chunks =
n_entries / 4;
3574 for (
unsigned int i = 0; i < n_chunks; ++i)
3597 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
3616 const unsigned int n_chunks =
n_entries / 4;
3619 for (
unsigned int i = 0; i < n_chunks; ++i)
3675 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
3676 for (
unsigned int v = 0; v < 8; ++v)
3677 out[
offsets[v] + i] += in[i][v];
3679 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
3680 for (
unsigned int v = 0; v < 8; ++v)
3681 out[
offsets[v] + i] = in[i][v];
3694 std::array<double *, 8> & out)
3698 const unsigned int n_chunks =
n_entries / 4;
3701 for (
unsigned int i = 0; i < n_chunks; ++i)
3753 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
3754 for (
unsigned int v = 0; v < 8; ++v)
3755 out[v][i] += in[i][v];
3757 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
3758 for (
unsigned int v = 0; v < 8; ++v)
3759 out[v][i] = in[i][v];
3794 template <
typename U>
3816 operator=(
const float scalar) && =
delete;
3826 return *(
reinterpret_cast<float *
>(&
data) +
comp);
3837 return *(
reinterpret_cast<const float *
>(&
data) +
comp);
3852# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
3867# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
3881# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
3896# ifdef DEAL_II_COMPILER_USE_VECTOR_ARITHMETICS
3911 load(
const float *ptr)
3924 store(
float *ptr)
const
3937 Assert(
reinterpret_cast<std::size_t
>(ptr) % 64 == 0,
3990 for (
unsigned int i = 0; i < 16; ++i)
3991 for (
unsigned int j = i + 1;
j < 16; ++
j)
3993 ExcMessage(
"Result of scatter undefined if two offset elements"
3994 " point to the same position"));
4106 template <
typename Number2, std::
size_t w
idth2>
4109 template <
typename Number2, std::
size_t w
idth2>
4112 template <
typename Number2, std::
size_t w
idth2>
4116 template <
typename Number2, std::
size_t w
idth2>
4138 const unsigned int n_chunks =
n_entries / 4;
4149 for (
unsigned int i = 0; i < n_chunks; ++i)
4180 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
4192 const std::array<float *, 16> &in,
4197 const unsigned int n_chunks =
n_entries / 4;
4202 for (
unsigned int i = 0; i < n_chunks; ++i)
4232 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
4249 const unsigned int n_chunks =
n_entries / 4;
4250 for (
unsigned int i = 0; i < n_chunks; ++i)
4341 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
4342 for (
unsigned int v = 0; v < 16; ++v)
4343 out[
offsets[v] + i] += in[i][v];
4345 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
4346 for (
unsigned int v = 0; v < 16; ++v)
4347 out[
offsets[v] + i] = in[i][v];
4360 std::array<float *, 16> & out)
4364 const unsigned int n_chunks =
n_entries / 4;
4365 for (
unsigned int i = 0; i < n_chunks; ++i)
4452 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
4453 for (
unsigned int v = 0; v < 16; ++v)
4454 out[v][i] += in[i][v];
4456 for (
unsigned int i = 4 * n_chunks; i <
n_entries; ++i)
4457 for (
unsigned int v = 0; v < 16; ++v)
4458 out[v][i] = in[i][v];
4463# if DEAL_II_VECTORIZATION_WIDTH_IN_BITS >= 128 && defined(__ALTIVEC__) && \
4493 template <
typename U>
4520 operator=(
const double scalar) && =
delete;
4530 return *(
reinterpret_cast<double *
>(&
data) +
comp);
4541 return *(
reinterpret_cast<const double *
>(&
data) +
comp);
4594 load(
const double *ptr)
4605 store(
double *ptr)
const
4627 for (
unsigned int i = 0; i < 2; ++i)
4638 for (
unsigned int i = 0; i < 2; ++i)
4703 template <
typename Number2, std::
size_t w
idth2>
4706 template <
typename Number2, std::
size_t w
idth2>
4709 template <
typename Number2, std::
size_t w
idth2>
4713 template <
typename Number2, std::
size_t w
idth2>
4748 template <
typename U>
4775 operator=(
const float scalar) && =
delete;
4785 return *(
reinterpret_cast<float *
>(&
data) +
comp);
4796 return *(
reinterpret_cast<const float *
>(&
data) +
comp);
4849 load(
const float *ptr)
4860 store(
float *ptr)
const
4882 for (
unsigned int i = 0; i < 4; ++i)
4893 for (
unsigned int i = 0; i < 4; ++i)
4958 template <
typename Number2, std::
size_t w
idth2>
4961 template <
typename Number2, std::
size_t w
idth2>
4964 template <
typename Number2, std::
size_t w
idth2>
4968 template <
typename Number2, std::
size_t w
idth2>
4992template <
typename Number, std::
size_t w
idth>
5010template <
typename Number, std::
size_t w
idth>
5024template <
typename Number, std::
size_t w
idth>
5038template <
typename Number, std::
size_t w
idth>
5052template <
typename Number, std::
size_t w
idth>
5067template <
typename Number, std::
size_t w
idth>
5083template <std::
size_t w
idth>
5097template <
typename Number, std::
size_t w
idth>
5112template <std::
size_t w
idth>
5125template <
typename Number, std::
size_t w
idth>
5141template <std::
size_t w
idth>
5155template <
typename Number, std::
size_t w
idth>
5171template <std::
size_t w
idth>
5185template <
typename Number, std::
size_t w
idth>
5201template <std::
size_t w
idth>
5215template <
typename Number, std::
size_t w
idth>
5230template <std::
size_t w
idth>
5243template <
typename Number, std::
size_t w
idth>
5259template <std::
size_t w
idth>
5273template <
typename Number, std::
size_t w
idth>
5289template <std::
size_t w
idth>
5302template <
typename Number, std::
size_t w
idth>
5314template <
typename Number, std::
size_t w
idth>
5328template <
typename Number, std::
size_t w
idth>
5329inline std::ostream &
5333 for (
unsigned int i = 0; i < n - 1; ++i)
5356#if DEAL_II_VECTORIZATION_WIDTH_IN_BITS >= 256 && defined(__AVX__)
5437template <SIMDComparison predicate,
typename Number>
5440 const Number &right,
5448 mask = (left == right);
5451 mask = (left != right);
5454 mask = (left < right);
5457 mask = (left <= right);
5460 mask = (left > right);
5463 mask = (left >= right);
5475template <SIMDComparison predicate,
typename Number>
5493# if DEAL_II_VECTORIZATION_WIDTH_IN_BITS >= 512 && defined(__AVX512F__)
5495template <SIMDComparison predicate>
5511template <SIMDComparison predicate>
5527# if DEAL_II_VECTORIZATION_WIDTH_IN_BITS >= 256 && defined(__AVX__)
5529template <SIMDComparison predicate>
5545template <SIMDComparison predicate>
5562# if DEAL_II_VECTORIZATION_WIDTH_IN_BITS >= 128 && defined(__SSE2__)
5564template <SIMDComparison predicate>
5602template <SIMDComparison predicate>
5645 template <
typename T>
5656 static constexpr std::size_t
5673 static constexpr std::size_t
5727 template <
typename T, std::
size_t w
idth_>
5738 static constexpr std::size_t
5756 static constexpr std::size_t
5829 template <
typename Number, std::
size_t w
idth>
5843 out.load(&values[0]);
5856 template <
typename Number, std::
size_t w
idth>
5865 out.load(&values[0]);
5878 template <
typename Number, std::
size_t w
idth>
5887 out.load(&values[0]);
5900 template <
typename Number, std::
size_t w
idth>
5909 out.load(&values[0]);
5922 template <
typename Number, std::
size_t w
idth>
5931 out.load(&values[0]);
5944 template <
typename Number, std::
size_t w
idth>
5948 return x.get_sqrt();
5960 template <
typename Number, std::
size_t w
idth>
5969 out.load(&values[0]);
5983 template <
typename Number, std::
size_t w
idth>
5993 out.load(&values[0]);
6006 template <
typename Number, std::
size_t w
idth>
6022 template <
typename Number, std::
size_t w
idth>
6027 return x.get_max(
y);
6039 template <
typename Number, std::
size_t w
idth>
6044 return x.get_min(
y);
6055#ifdef DEAL_II_HAVE_CXX20
value_type * data() const noexcept
VectorizedArrayBase()=default
VectorizedArrayIterator< const T > begin() const
VectorizedArrayIterator< const T > end() const
static constexpr std::size_t size()
VectorizedArrayBase(const std::initializer_list< U > &list)
VectorizedArrayIterator< T > end()
VectorizedArrayIterator< T > begin()
VectorizedArrayIterator< T > & operator+=(const std::size_t offset)
VectorizedArrayIterator< T > & operator=(const VectorizedArrayIterator< T > &other)=default
VectorizedArrayIterator< T > & operator--()
VectorizedArrayIterator< T > & operator++()
std::enable_if_t<!std::is_same< U, const U >::value, typename T::value_type > & operator*()
std::ptrdiff_t operator-(const VectorizedArrayIterator< T > &other) const
bool operator==(const VectorizedArrayIterator< T > &other) const
VectorizedArrayIterator(T &data, const std::size_t lane)
const T::value_type & operator*() const
bool operator!=(const VectorizedArrayIterator< T > &other) const
VectorizedArrayIterator< T > operator+(const std::size_t &offset) const
VectorizedArray< Number, width > operator-(const VectorizedArray< Number, width > &u)
VectorizedArray & operator=(const Number scalar) &
VectorizedArray< float, width > operator+(const VectorizedArray< float, width > &v, const double u)
VectorizedArray & operator/=(const VectorizedArray &vec)
void gather(const Number *base_ptr, const unsigned int *offsets)
void vectorized_load_and_transpose(const unsigned int n_entries, const Number *in, const unsigned int *offsets, VectorizedArray< Number, width > *out)
VectorizedArray< Number, width > operator+(const VectorizedArray< Number, width > &v, const Number &u)
VectorizedArrayType make_vectorized_array(const typename VectorizedArrayType::value_type &u)
VectorizedArray< Number, width > operator/(const VectorizedArray< Number, width > &v, const Number &u)
VectorizedArray get_abs() const
VectorizedArray< float, width > operator/(const VectorizedArray< float, width > &v, const double u)
VectorizedArray< Number, width > abs(const ::VectorizedArray< Number, width > &x)
VectorizedArray< Number, width > max(const ::VectorizedArray< Number, width > &x, const ::VectorizedArray< Number, width > &y)
VectorizedArray< Number, width > log(const ::VectorizedArray< Number, width > &x)
VectorizedArray< Number, width > operator*(const VectorizedArray< Number, width > &v, const Number &u)
VectorizedArray< Number, width > exp(const ::VectorizedArray< Number, width > &x)
VectorizedArray< Number, width > operator-(const VectorizedArray< Number, width > &v, const Number &u)
Number & operator[](const unsigned int comp)
VectorizedArray< float, width > operator-(const double u, const VectorizedArray< float, width > &v)
VectorizedArray< Number, width > operator+(const Number &u, const VectorizedArray< Number, width > &v)
VectorizedArray< Number, width > operator+(const VectorizedArray< Number, width > &u)
VectorizedArray()=default
bool operator==(const VectorizedArray< Number, width > &lhs, const VectorizedArray< Number, width > &rhs)
VectorizedArray< Number, width > tan(const ::VectorizedArray< Number, width > &x)
VectorizedArray(const Number scalar)
VectorizedArray< Number, width > operator-(const VectorizedArray< Number, width > &u, const VectorizedArray< Number, width > &v)
VectorizedArray< float, width > operator*(const VectorizedArray< float, width > &v, const double u)
VectorizedArray & operator*=(const VectorizedArray &vec)
VectorizedArray get_max(const VectorizedArray &other) const
const Number & operator[](const unsigned int comp) const
VectorizedArray< Number, width > min(const ::VectorizedArray< Number, width > &x, const ::VectorizedArray< Number, width > &y)
VectorizedArray get_min(const VectorizedArray &other) const
VectorizedArray< Number, width > pow(const ::VectorizedArray< Number, width > &x, const Number p)
VectorizedArray< Number, width > pow(const ::VectorizedArray< Number, width > &x, const ::VectorizedArray< Number, width > &p)
VectorizedArray< Number, width > sqrt(const ::VectorizedArray< Number, width > &x)
void store(OtherNumber *ptr) const
VectorizedArray< float, width > operator-(const VectorizedArray< float, width > &v, const double u)
void load(const OtherNumber *ptr)
void scatter(const unsigned int *offsets, Number *base_ptr) const
VectorizedArray< Number, width > operator-(const Number &u, const VectorizedArray< Number, width > &v)
VectorizedArray & operator=(const Number scalar) &&=delete
VectorizedArray< Number, width > operator*(const VectorizedArray< Number, width > &u, const VectorizedArray< Number, width > &v)
VectorizedArray & operator-=(const VectorizedArray &vec)
VectorizedArray< float, width > operator+(const double u, const VectorizedArray< float, width > &v)
VectorizedArray< Number, width > operator*(const Number &u, const VectorizedArray< Number, width > &v)
VectorizedArray get_sqrt() const
VectorizedArray< Number, width > operator/(const Number &u, const VectorizedArray< Number, width > &v)
VectorizedArray & operator+=(const VectorizedArray &vec)
VectorizedArray< Number, width > make_vectorized_array(const Number &u)
VectorizedArray< Number, width > operator/(const VectorizedArray< Number, width > &u, const VectorizedArray< Number, width > &v)
VectorizedArray< Number, width > operator+(const VectorizedArray< Number, width > &u, const VectorizedArray< Number, width > &v)
VectorizedArray< Number, width > cos(const ::VectorizedArray< Number, width > &x)
VectorizedArray< Number, width > sin(const ::VectorizedArray< Number, width > &x)
void streaming_store(Number *ptr) const
VectorizedArray(const std::initializer_list< U > &list)
void vectorized_transpose_and_store(const bool add_into, const unsigned int n_entries, const VectorizedArray< Number, width > *in, const unsigned int *offsets, Number *out)
VectorizedArray< float, width > operator/(const double u, const VectorizedArray< float, width > &v)
VectorizedArray< float, width > operator*(const double u, const VectorizedArray< float, width > &v)
#define DEAL_II_ALWAYS_INLINE
#define DEAL_II_OPENMP_SIMD_PRAGMA
#define DEAL_II_NAMESPACE_OPEN
#define DEAL_II_NAMESPACE_CLOSE
__global__ void vec_add(Number *val, const Number a, const size_type N)
#define Assert(cond, exc)
#define AssertIndexRange(index, range)
static ::ExceptionBase & ExcMessage(std::string arg1)
::VectorizedArray< Number, width > log(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > exp(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > tan(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > min(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > max(const ::VectorizedArray< Number, width > &, const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > cos(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > sin(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > sqrt(const ::VectorizedArray< Number, width > &)
::VectorizedArray< Number, width > pow(const ::VectorizedArray< Number, width > &, const Number p)
::VectorizedArray< Number, width > abs(const ::VectorizedArray< Number, width > &)
static value_type & get(vectorized_value_type &values, unsigned int c)
static constexpr std::size_t stride()
static vectorized_value_type & get_from_vectorized(vectorized_value_type &values, unsigned int c)
static constexpr std::size_t width()
static const value_type & get(const vectorized_value_type &values, unsigned int c)
static const vectorized_value_type & get_from_vectorized(const vectorized_value_type &values, unsigned int c)
static constexpr std::size_t width()
static constexpr std::size_t stride()
static const value_type & get(const value_type &value, unsigned int c)
VectorizedArray< T > vectorized_value_type
static const value_type & get_from_vectorized(const vectorized_value_type &values, unsigned int c)
static value_type & get_from_vectorized(vectorized_value_type &values, unsigned int c)
static value_type & get(value_type &value, unsigned int c)
typename T::value_type value_type
std::ptrdiff_t difference_type
void gather(VectorizedArray< Number, width > &out, const std::array< Number *, width > &ptrs, const unsigned int offset)
void vectorized_load_and_transpose(const unsigned int n_entries, const Number *in, const unsigned int *offsets, VectorizedArray< Number, width > *out)
std::ostream & operator<<(std::ostream &out, const VectorizedArray< Number, width > &p)
Number compare_and_apply_mask(const Number &left, const Number &right, const Number &true_value, const Number &false_value)
void vectorized_transpose_and_store(const bool add_into, const unsigned int n_entries, const VectorizedArray< Number, width > *in, const unsigned int *offsets, Number *out)