29#ifndef INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_AOS_BLOCK_UTIL_HPP
30#define INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_AOS_BLOCK_UTIL_HPP
32#include "mdds/global.hpp"
33#include "../types.hpp"
35namespace mdds {
namespace mtv {
namespace aos {
namespace detail {
37template<
typename Blks, lu_factor_t F>
40 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta)
const
43 mdds::detail::invalid_static_int<F>,
"The loop-unrolling factor must be one of 0, 4, 8, 16, or 32.");
47template<
typename Blks>
50 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta)
const
52 int64_t n = blocks.size();
54 if (start_block_index >= n)
58#pragma omp parallel for
60 for (int64_t i = start_block_index; i < n; ++i)
61 blocks[i].position += delta;
65template<
typename Blks>
68 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta)
const
70 int64_t n = blocks.size();
72 if (start_block_index >= n)
76 int64_t len = n - start_block_index;
77 int64_t rem = len & 3;
79 len += start_block_index;
81#pragma omp parallel for
83 for (int64_t i = start_block_index; i < len; i += 4)
85 blocks[i].position += delta;
86 blocks[i + 1].position += delta;
87 blocks[i + 2].position += delta;
88 blocks[i + 3].position += delta;
92 for (int64_t i = len; i < rem; ++i)
93 blocks[i].position += delta;
97template<
typename Blks>
100 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta)
const
102 int64_t n = blocks.size();
104 if (start_block_index >= n)
108 int64_t len = n - start_block_index;
109 int64_t rem = len & 7;
111 len += start_block_index;
113#pragma omp parallel for
115 for (int64_t i = start_block_index; i < len; i += 8)
117 blocks[i].position += delta;
118 blocks[i + 1].position += delta;
119 blocks[i + 2].position += delta;
120 blocks[i + 3].position += delta;
121 blocks[i + 4].position += delta;
122 blocks[i + 5].position += delta;
123 blocks[i + 6].position += delta;
124 blocks[i + 7].position += delta;
128 for (int64_t i = len; i < rem; ++i)
129 blocks[i].position += delta;
133template<
typename Blks>
136 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta)
const
138 int64_t n = blocks.size();
140 if (start_block_index >= n)
144 int64_t len = n - start_block_index;
145 int64_t rem = len & 15;
147 len += start_block_index;
149#pragma omp parallel for
151 for (int64_t i = start_block_index; i < len; i += 16)
153 blocks[i].position += delta;
154 blocks[i + 1].position += delta;
155 blocks[i + 2].position += delta;
156 blocks[i + 3].position += delta;
157 blocks[i + 4].position += delta;
158 blocks[i + 5].position += delta;
159 blocks[i + 6].position += delta;
160 blocks[i + 7].position += delta;
161 blocks[i + 8].position += delta;
162 blocks[i + 9].position += delta;
163 blocks[i + 10].position += delta;
164 blocks[i + 11].position += delta;
165 blocks[i + 12].position += delta;
166 blocks[i + 13].position += delta;
167 blocks[i + 14].position += delta;
168 blocks[i + 15].position += delta;
172 for (int64_t i = len; i < rem; ++i)
173 blocks[i].position += delta;
177template<
typename Blks>
180 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta)
const
182 int64_t n = blocks.size();
184 if (start_block_index >= n)
188 int64_t len = n - start_block_index;
189 int64_t rem = len & 31;
191 len += start_block_index;
193#pragma omp parallel for
195 for (int64_t i = start_block_index; i < len; i += 32)
197 blocks[i].position += delta;
198 blocks[i + 1].position += delta;
199 blocks[i + 2].position += delta;
200 blocks[i + 3].position += delta;
201 blocks[i + 4].position += delta;
202 blocks[i + 5].position += delta;
203 blocks[i + 6].position += delta;
204 blocks[i + 7].position += delta;
205 blocks[i + 8].position += delta;
206 blocks[i + 9].position += delta;
207 blocks[i + 10].position += delta;
208 blocks[i + 11].position += delta;
209 blocks[i + 12].position += delta;
210 blocks[i + 13].position += delta;
211 blocks[i + 14].position += delta;
212 blocks[i + 15].position += delta;
213 blocks[i + 16].position += delta;
214 blocks[i + 17].position += delta;
215 blocks[i + 18].position += delta;
216 blocks[i + 19].position += delta;
217 blocks[i + 20].position += delta;
218 blocks[i + 21].position += delta;
219 blocks[i + 22].position += delta;
220 blocks[i + 23].position += delta;
221 blocks[i + 24].position += delta;
222 blocks[i + 25].position += delta;
223 blocks[i + 26].position += delta;
224 blocks[i + 27].position += delta;
225 blocks[i + 28].position += delta;
226 blocks[i + 29].position += delta;
227 blocks[i + 30].position += delta;
228 blocks[i + 31].position += delta;
232 for (int64_t i = len; i < rem; ++i)
233 blocks[i].position += delta;
Definition: aos/block_util.hpp:39