mdds
aos/block_util.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*************************************************************************
3 *
4 * Copyright (c) 2021 Kohei Yoshida
5 *
6 * Permission is hereby granted, free of charge, to any person
7 * obtaining a copy of this software and associated documentation
8 * files (the "Software"), to deal in the Software without
9 * restriction, including without limitation the rights to use,
10 * copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following
13 * conditions:
14 *
15 * The above copyright notice and this permission notice shall be
16 * included in all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 * OTHER DEALINGS IN THE SOFTWARE.
26 *
27 ************************************************************************/
28
29#ifndef INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_AOS_BLOCK_UTIL_HPP
30#define INCLUDED_MDDS_MULTI_TYPE_VECTOR_DIR_AOS_BLOCK_UTIL_HPP
31
32#include "mdds/global.hpp"
33#include "../types.hpp"
34
35namespace mdds { namespace mtv { namespace aos { namespace detail {
36
37template<typename Blks, lu_factor_t F>
39{
40 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
41 {
42 static_assert(
43 mdds::detail::invalid_static_int<F>, "The loop-unrolling factor must be one of 0, 4, 8, 16, or 32.");
44 }
45};
46
47template<typename Blks>
48struct adjust_block_positions<Blks, lu_factor_t::none>
49{
50 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
51 {
52 int64_t n = blocks.size();
53
54 if (start_block_index >= n)
55 return;
56
57#if MDDS_USE_OPENMP
58#pragma omp parallel for
59#endif
60 for (int64_t i = start_block_index; i < n; ++i)
61 blocks[i].position += delta;
62 }
63};
64
65template<typename Blks>
66struct adjust_block_positions<Blks, lu_factor_t::lu4>
67{
68 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
69 {
70 int64_t n = blocks.size();
71
72 if (start_block_index >= n)
73 return;
74
75 // Ensure that the section length is divisible by 4.
76 int64_t len = n - start_block_index;
77 int64_t rem = len & 3; // % 4
78 len -= rem;
79 len += start_block_index;
80#if MDDS_USE_OPENMP
81#pragma omp parallel for
82#endif
83 for (int64_t i = start_block_index; i < len; i += 4)
84 {
85 blocks[i].position += delta;
86 blocks[i + 1].position += delta;
87 blocks[i + 2].position += delta;
88 blocks[i + 3].position += delta;
89 }
90
91 rem += len;
92 for (int64_t i = len; i < rem; ++i)
93 blocks[i].position += delta;
94 }
95};
96
97template<typename Blks>
98struct adjust_block_positions<Blks, lu_factor_t::lu8>
99{
100 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
101 {
102 int64_t n = blocks.size();
103
104 if (start_block_index >= n)
105 return;
106
107 // Ensure that the section length is divisible by 8.
108 int64_t len = n - start_block_index;
109 int64_t rem = len & 7; // % 8
110 len -= rem;
111 len += start_block_index;
112#if MDDS_USE_OPENMP
113#pragma omp parallel for
114#endif
115 for (int64_t i = start_block_index; i < len; i += 8)
116 {
117 blocks[i].position += delta;
118 blocks[i + 1].position += delta;
119 blocks[i + 2].position += delta;
120 blocks[i + 3].position += delta;
121 blocks[i + 4].position += delta;
122 blocks[i + 5].position += delta;
123 blocks[i + 6].position += delta;
124 blocks[i + 7].position += delta;
125 }
126
127 rem += len;
128 for (int64_t i = len; i < rem; ++i)
129 blocks[i].position += delta;
130 }
131};
132
133template<typename Blks>
134struct adjust_block_positions<Blks, lu_factor_t::lu16>
135{
136 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
137 {
138 int64_t n = blocks.size();
139
140 if (start_block_index >= n)
141 return;
142
143 // Ensure that the section length is divisible by 16.
144 int64_t len = n - start_block_index;
145 int64_t rem = len & 15; // % 16
146 len -= rem;
147 len += start_block_index;
148#if MDDS_USE_OPENMP
149#pragma omp parallel for
150#endif
151 for (int64_t i = start_block_index; i < len; i += 16)
152 {
153 blocks[i].position += delta;
154 blocks[i + 1].position += delta;
155 blocks[i + 2].position += delta;
156 blocks[i + 3].position += delta;
157 blocks[i + 4].position += delta;
158 blocks[i + 5].position += delta;
159 blocks[i + 6].position += delta;
160 blocks[i + 7].position += delta;
161 blocks[i + 8].position += delta;
162 blocks[i + 9].position += delta;
163 blocks[i + 10].position += delta;
164 blocks[i + 11].position += delta;
165 blocks[i + 12].position += delta;
166 blocks[i + 13].position += delta;
167 blocks[i + 14].position += delta;
168 blocks[i + 15].position += delta;
169 }
170
171 rem += len;
172 for (int64_t i = len; i < rem; ++i)
173 blocks[i].position += delta;
174 }
175};
176
177template<typename Blks>
178struct adjust_block_positions<Blks, lu_factor_t::lu32>
179{
180 void operator()(Blks& blocks, int64_t start_block_index, int64_t delta) const
181 {
182 int64_t n = blocks.size();
183
184 if (start_block_index >= n)
185 return;
186
187 // Ensure that the section length is divisible by 32.
188 int64_t len = n - start_block_index;
189 int64_t rem = len & 31; // % 32
190 len -= rem;
191 len += start_block_index;
192#if MDDS_USE_OPENMP
193#pragma omp parallel for
194#endif
195 for (int64_t i = start_block_index; i < len; i += 32)
196 {
197 blocks[i].position += delta;
198 blocks[i + 1].position += delta;
199 blocks[i + 2].position += delta;
200 blocks[i + 3].position += delta;
201 blocks[i + 4].position += delta;
202 blocks[i + 5].position += delta;
203 blocks[i + 6].position += delta;
204 blocks[i + 7].position += delta;
205 blocks[i + 8].position += delta;
206 blocks[i + 9].position += delta;
207 blocks[i + 10].position += delta;
208 blocks[i + 11].position += delta;
209 blocks[i + 12].position += delta;
210 blocks[i + 13].position += delta;
211 blocks[i + 14].position += delta;
212 blocks[i + 15].position += delta;
213 blocks[i + 16].position += delta;
214 blocks[i + 17].position += delta;
215 blocks[i + 18].position += delta;
216 blocks[i + 19].position += delta;
217 blocks[i + 20].position += delta;
218 blocks[i + 21].position += delta;
219 blocks[i + 22].position += delta;
220 blocks[i + 23].position += delta;
221 blocks[i + 24].position += delta;
222 blocks[i + 25].position += delta;
223 blocks[i + 26].position += delta;
224 blocks[i + 27].position += delta;
225 blocks[i + 28].position += delta;
226 blocks[i + 29].position += delta;
227 blocks[i + 30].position += delta;
228 blocks[i + 31].position += delta;
229 }
230
231 rem += len;
232 for (int64_t i = len; i < rem; ++i)
233 blocks[i].position += delta;
234 }
235};
236
237}}}} // namespace mdds::mtv::aos::detail
238
239#endif
240
241/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: aos/block_util.hpp:39