LLVM OpenMP* Runtime Library
kmp_sched.cpp
1/*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13/* Static scheduling initialization.
14
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
18
19#include "kmp.h"
20#include "kmp_error.h"
21#include "kmp_i18n.h"
22#include "kmp_itt.h"
23#include "kmp_stats.h"
24#include "kmp_str.h"
25
26#if OMPT_SUPPORT
27#include "ompt-specific.h"
28#endif
29
30#ifdef KMP_DEBUG
31//-------------------------------------------------------------------------
32// template for debug prints specification ( d, u, lld, llu )
33char const *traits_t<int>::spec = "d";
34char const *traits_t<unsigned int>::spec = "u";
35char const *traits_t<long long>::spec = "lld";
36char const *traits_t<unsigned long long>::spec = "llu";
37char const *traits_t<long>::spec = "ld";
38//-------------------------------------------------------------------------
39#endif
40
41#if KMP_STATS_ENABLED
42#define KMP_STATS_LOOP_END(stat) \
43 { \
44 kmp_int64 t; \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
48 if (i == 1) { \
49 t = u - l + 1; \
50 } else if (i == -1) { \
51 t = l - u + 1; \
52 } else if (i > 0) { \
53 t = (u - l) / i + 1; \
54 } else { \
55 t = (l - u) / (-i) + 1; \
56 } \
57 KMP_COUNT_VALUE(stat, t); \
58 KMP_POP_PARTITIONED_TIMER(); \
59 }
60#else
61#define KMP_STATS_LOOP_END(stat) /* Nothing */
62#endif
63
64static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
65static inline void check_loc(ident_t *&loc) {
66 if (loc == NULL)
67 loc = &loc_stub; // may need to report location info to ittnotify
68}
69
70template <typename T>
71static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
72 kmp_int32 schedtype, kmp_int32 *plastiter,
73 T *plower, T *pupper,
74 typename traits_t<T>::signed_t *pstride,
75 typename traits_t<T>::signed_t incr,
76 typename traits_t<T>::signed_t chunk
77#if OMPT_SUPPORT && OMPT_OPTIONAL
78 ,
79 void *codeptr
80#endif
81) {
82 KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
83 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
84 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
85
86 typedef typename traits_t<T>::unsigned_t UT;
87 typedef typename traits_t<T>::signed_t ST;
88 /* this all has to be changed back to TID and such.. */
89 kmp_int32 gtid = global_tid;
90 kmp_uint32 tid;
91 kmp_uint32 nth;
92 UT trip_count;
93 kmp_team_t *team;
94 __kmp_assert_valid_gtid(gtid);
95 kmp_info_t *th = __kmp_threads[gtid];
96
97#if OMPT_SUPPORT && OMPT_OPTIONAL
98 ompt_team_info_t *team_info = NULL;
99 ompt_task_info_t *task_info = NULL;
100 ompt_work_t ompt_work_type = ompt_work_loop;
101
102 static kmp_int8 warn = 0;
103
104 if (ompt_enabled.ompt_callback_work) {
105 // Only fully initialize variables needed by OMPT if OMPT is enabled.
106 team_info = __ompt_get_teaminfo(0, NULL);
107 task_info = __ompt_get_task_info_object(0);
108 // Determine workshare type
109 if (loc != NULL) {
110 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
111 ompt_work_type = ompt_work_loop;
112 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
113 ompt_work_type = ompt_work_sections;
114 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
115 ompt_work_type = ompt_work_distribute;
116 } else {
117 kmp_int8 bool_res =
118 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
119 if (bool_res)
120 KMP_WARNING(OmptOutdatedWorkshare);
121 }
122 KMP_DEBUG_ASSERT(ompt_work_type);
123 }
124 }
125#endif
126
127 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
128 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
129#ifdef KMP_DEBUG
130 {
131 char *buff;
132 // create format specifiers before the debug output
133 buff = __kmp_str_format(
134 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
135 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
136 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
137 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
138 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
139 *pstride, incr, chunk));
140 __kmp_str_free(&buff);
141 }
142#endif
143
144 if (__kmp_env_consistency_check) {
145 __kmp_push_workshare(global_tid, ct_pdo, loc);
146 if (incr == 0) {
147 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
148 loc);
149 }
150 }
151 /* special handling for zero-trip loops */
152 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
153 if (plastiter != NULL)
154 *plastiter = FALSE;
155 /* leave pupper and plower set to entire iteration space */
156 *pstride = incr; /* value should never be used */
157// *plower = *pupper - incr;
158// let compiler bypass the illegal loop (like for(i=1;i<10;i--))
159// THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
160// ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
161#ifdef KMP_DEBUG
162 {
163 char *buff;
164 // create format specifiers before the debug output
165 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
166 "lower=%%%s upper=%%%s stride = %%%s "
167 "signed?<%s>, loc = %%s\n",
168 traits_t<T>::spec, traits_t<T>::spec,
169 traits_t<ST>::spec, traits_t<T>::spec);
170 check_loc(loc);
171 KD_TRACE(100,
172 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
173 __kmp_str_free(&buff);
174 }
175#endif
176 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
177
178#if OMPT_SUPPORT && OMPT_OPTIONAL
179 if (ompt_enabled.ompt_callback_work) {
180 ompt_callbacks.ompt_callback(ompt_callback_work)(
181 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
182 &(task_info->task_data), 0, codeptr);
183 }
184#endif
185 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
186 return;
187 }
188
189 // Although there are schedule enumerations above kmp_ord_upper which are not
190 // schedules for "distribute", the only ones which are useful are dynamic, so
191 // cannot be seen here, since this codepath is only executed for static
192 // schedules.
193 if (schedtype > kmp_ord_upper) {
194 // we are in DISTRIBUTE construct
195 schedtype += kmp_sch_static -
196 kmp_distribute_static; // AC: convert to usual schedule type
197 tid = th->th.th_team->t.t_master_tid;
198 team = th->th.th_team->t.t_parent;
199 } else {
200 tid = __kmp_tid_from_gtid(global_tid);
201 team = th->th.th_team;
202 }
203
204 /* determine if "for" loop is an active worksharing construct */
205 if (team->t.t_serialized) {
206 /* serialized parallel, each thread executes whole iteration space */
207 if (plastiter != NULL)
208 *plastiter = TRUE;
209 /* leave pupper and plower set to entire iteration space */
210 *pstride =
211 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
212
213#ifdef KMP_DEBUG
214 {
215 char *buff;
216 // create format specifiers before the debug output
217 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
218 "lower=%%%s upper=%%%s stride = %%%s\n",
219 traits_t<T>::spec, traits_t<T>::spec,
220 traits_t<ST>::spec);
221 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
222 __kmp_str_free(&buff);
223 }
224#endif
225 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
226
227#if OMPT_SUPPORT && OMPT_OPTIONAL
228 if (ompt_enabled.ompt_callback_work) {
229 ompt_callbacks.ompt_callback(ompt_callback_work)(
230 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
231 &(task_info->task_data), *pstride, codeptr);
232 }
233#endif
234 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
235 return;
236 }
237 nth = team->t.t_nproc;
238 if (nth == 1) {
239 if (plastiter != NULL)
240 *plastiter = TRUE;
241 *pstride =
242 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
243#ifdef KMP_DEBUG
244 {
245 char *buff;
246 // create format specifiers before the debug output
247 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
248 "lower=%%%s upper=%%%s stride = %%%s\n",
249 traits_t<T>::spec, traits_t<T>::spec,
250 traits_t<ST>::spec);
251 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
252 __kmp_str_free(&buff);
253 }
254#endif
255 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
256
257#if OMPT_SUPPORT && OMPT_OPTIONAL
258 if (ompt_enabled.ompt_callback_work) {
259 ompt_callbacks.ompt_callback(ompt_callback_work)(
260 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
261 &(task_info->task_data), *pstride, codeptr);
262 }
263#endif
264 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
265 return;
266 }
267
268 /* compute trip count */
269 if (incr == 1) {
270 trip_count = *pupper - *plower + 1;
271 } else if (incr == -1) {
272 trip_count = *plower - *pupper + 1;
273 } else if (incr > 0) {
274 // upper-lower can exceed the limit of signed type
275 trip_count = (UT)(*pupper - *plower) / incr + 1;
276 } else {
277 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
278 }
279
280#if KMP_STATS_ENABLED
281 if (KMP_MASTER_GTID(gtid)) {
282 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
283 }
284#endif
285
286 if (__kmp_env_consistency_check) {
287 /* tripcount overflow? */
288 if (trip_count == 0 && *pupper != *plower) {
289 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
290 loc);
291 }
292 }
293
294 /* compute remaining parameters */
295 switch (schedtype) {
296 case kmp_sch_static: {
297 if (trip_count < nth) {
298 KMP_DEBUG_ASSERT(
299 __kmp_static == kmp_sch_static_greedy ||
300 __kmp_static ==
301 kmp_sch_static_balanced); // Unknown static scheduling type.
302 if (tid < trip_count) {
303 *pupper = *plower = *plower + tid * incr;
304 } else {
305 // set bounds so non-active threads execute no iterations
306 *plower = *pupper + (incr > 0 ? 1 : -1);
307 }
308 if (plastiter != NULL)
309 *plastiter = (tid == trip_count - 1);
310 } else {
311 if (__kmp_static == kmp_sch_static_balanced) {
312 UT small_chunk = trip_count / nth;
313 UT extras = trip_count % nth;
314 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
315 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
316 if (plastiter != NULL)
317 *plastiter = (tid == nth - 1);
318 } else {
319 T big_chunk_inc_count =
320 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
321 T old_upper = *pupper;
322
323 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
324 // Unknown static scheduling type.
325
326 *plower += tid * big_chunk_inc_count;
327 *pupper = *plower + big_chunk_inc_count - incr;
328 if (incr > 0) {
329 if (*pupper < *plower)
330 *pupper = traits_t<T>::max_value;
331 if (plastiter != NULL)
332 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
333 if (*pupper > old_upper)
334 *pupper = old_upper; // tracker C73258
335 } else {
336 if (*pupper > *plower)
337 *pupper = traits_t<T>::min_value;
338 if (plastiter != NULL)
339 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
340 if (*pupper < old_upper)
341 *pupper = old_upper; // tracker C73258
342 }
343 }
344 }
345 *pstride = trip_count;
346 break;
347 }
348 case kmp_sch_static_chunked: {
349 ST span;
350 UT nchunks;
351 if (chunk < 1)
352 chunk = 1;
353 else if ((UT)chunk > trip_count)
354 chunk = trip_count;
355 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
356 span = chunk * incr;
357 if (nchunks < nth) {
358 *pstride = span * nchunks;
359 if (tid < nchunks) {
360 *plower = *plower + (span * tid);
361 *pupper = *plower + span - incr;
362 } else {
363 *plower = *pupper + (incr > 0 ? 1 : -1);
364 }
365 } else {
366 *pstride = span * nth;
367 *plower = *plower + (span * tid);
368 *pupper = *plower + span - incr;
369 }
370 if (plastiter != NULL)
371 *plastiter = (tid == (nchunks - 1) % nth);
372 break;
373 }
374 case kmp_sch_static_balanced_chunked: {
375 T old_upper = *pupper;
376 // round up to make sure the chunk is enough to cover all iterations
377 UT span = (trip_count + nth - 1) / nth;
378
379 // perform chunk adjustment
380 chunk = (span + chunk - 1) & ~(chunk - 1);
381
382 span = chunk * incr;
383 *plower = *plower + (span * tid);
384 *pupper = *plower + span - incr;
385 if (incr > 0) {
386 if (*pupper > old_upper)
387 *pupper = old_upper;
388 } else if (*pupper < old_upper)
389 *pupper = old_upper;
390
391 if (plastiter != NULL)
392 *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
393 break;
394 }
395 default:
396 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
397 break;
398 }
399
400#if USE_ITT_BUILD
401 // Report loop metadata
402 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
403 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
404 team->t.t_active_level == 1) {
405 kmp_uint64 cur_chunk = chunk;
406 check_loc(loc);
407 // Calculate chunk in case it was not specified; it is specified for
408 // kmp_sch_static_chunked
409 if (schedtype == kmp_sch_static) {
410 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
411 }
412 // 0 - "static" schedule
413 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
414 }
415#endif
416#ifdef KMP_DEBUG
417 {
418 char *buff;
419 // create format specifiers before the debug output
420 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
421 "upper=%%%s stride = %%%s signed?<%s>\n",
422 traits_t<T>::spec, traits_t<T>::spec,
423 traits_t<ST>::spec, traits_t<T>::spec);
424 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
425 __kmp_str_free(&buff);
426 }
427#endif
428 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
429
430#if OMPT_SUPPORT && OMPT_OPTIONAL
431 if (ompt_enabled.ompt_callback_work) {
432 ompt_callbacks.ompt_callback(ompt_callback_work)(
433 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
434 &(task_info->task_data), trip_count, codeptr);
435 }
436#endif
437
438 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
439 return;
440}
441
442template <typename T>
443static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
444 kmp_int32 schedule, kmp_int32 *plastiter,
445 T *plower, T *pupper, T *pupperDist,
446 typename traits_t<T>::signed_t *pstride,
447 typename traits_t<T>::signed_t incr,
448 typename traits_t<T>::signed_t chunk) {
449 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
450 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
451 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
452 typedef typename traits_t<T>::unsigned_t UT;
453 typedef typename traits_t<T>::signed_t ST;
454 kmp_uint32 tid;
455 kmp_uint32 nth;
456 kmp_uint32 team_id;
457 kmp_uint32 nteams;
458 UT trip_count;
459 kmp_team_t *team;
460 kmp_info_t *th;
461
462 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
463 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
464 __kmp_assert_valid_gtid(gtid);
465#ifdef KMP_DEBUG
466 {
467 char *buff;
468 // create format specifiers before the debug output
469 buff = __kmp_str_format(
470 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
471 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
472 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
473 traits_t<ST>::spec, traits_t<T>::spec);
474 KD_TRACE(100,
475 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
476 __kmp_str_free(&buff);
477 }
478#endif
479
480 if (__kmp_env_consistency_check) {
481 __kmp_push_workshare(gtid, ct_pdo, loc);
482 if (incr == 0) {
483 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
484 loc);
485 }
486 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
487 // The loop is illegal.
488 // Some zero-trip loops maintained by compiler, e.g.:
489 // for(i=10;i<0;++i) // lower >= upper - run-time check
490 // for(i=0;i>10;--i) // lower <= upper - run-time check
491 // for(i=0;i>10;++i) // incr > 0 - compile-time check
492 // for(i=10;i<0;--i) // incr < 0 - compile-time check
493 // Compiler does not check the following illegal loops:
494 // for(i=0;i<10;i+=incr) // where incr<0
495 // for(i=10;i>0;i-=incr) // where incr<0
496 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
497 }
498 }
499 tid = __kmp_tid_from_gtid(gtid);
500 th = __kmp_threads[gtid];
501 nth = th->th.th_team_nproc;
502 team = th->th.th_team;
503 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
504 nteams = th->th.th_teams_size.nteams;
505 team_id = team->t.t_master_tid;
506 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
507
508 // compute global trip count
509 if (incr == 1) {
510 trip_count = *pupper - *plower + 1;
511 } else if (incr == -1) {
512 trip_count = *plower - *pupper + 1;
513 } else if (incr > 0) {
514 // upper-lower can exceed the limit of signed type
515 trip_count = (UT)(*pupper - *plower) / incr + 1;
516 } else {
517 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
518 }
519
520 *pstride = *pupper - *plower; // just in case (can be unused)
521 if (trip_count <= nteams) {
522 KMP_DEBUG_ASSERT(
523 __kmp_static == kmp_sch_static_greedy ||
524 __kmp_static ==
525 kmp_sch_static_balanced); // Unknown static scheduling type.
526 // only primary threads of some teams get single iteration, other threads
527 // get nothing
528 if (team_id < trip_count && tid == 0) {
529 *pupper = *pupperDist = *plower = *plower + team_id * incr;
530 } else {
531 *pupperDist = *pupper;
532 *plower = *pupper + incr; // compiler should skip loop body
533 }
534 if (plastiter != NULL)
535 *plastiter = (tid == 0 && team_id == trip_count - 1);
536 } else {
537 // Get the team's chunk first (each team gets at most one chunk)
538 if (__kmp_static == kmp_sch_static_balanced) {
539 UT chunkD = trip_count / nteams;
540 UT extras = trip_count % nteams;
541 *plower +=
542 incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
543 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
544 if (plastiter != NULL)
545 *plastiter = (team_id == nteams - 1);
546 } else {
547 T chunk_inc_count =
548 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
549 T upper = *pupper;
550 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
551 // Unknown static scheduling type.
552 *plower += team_id * chunk_inc_count;
553 *pupperDist = *plower + chunk_inc_count - incr;
554 // Check/correct bounds if needed
555 if (incr > 0) {
556 if (*pupperDist < *plower)
557 *pupperDist = traits_t<T>::max_value;
558 if (plastiter != NULL)
559 *plastiter = *plower <= upper && *pupperDist > upper - incr;
560 if (*pupperDist > upper)
561 *pupperDist = upper; // tracker C73258
562 if (*plower > *pupperDist) {
563 *pupper = *pupperDist; // no iterations available for the team
564 goto end;
565 }
566 } else {
567 if (*pupperDist > *plower)
568 *pupperDist = traits_t<T>::min_value;
569 if (plastiter != NULL)
570 *plastiter = *plower >= upper && *pupperDist < upper - incr;
571 if (*pupperDist < upper)
572 *pupperDist = upper; // tracker C73258
573 if (*plower < *pupperDist) {
574 *pupper = *pupperDist; // no iterations available for the team
575 goto end;
576 }
577 }
578 }
579 // Get the parallel loop chunk now (for thread)
580 // compute trip count for team's chunk
581 if (incr == 1) {
582 trip_count = *pupperDist - *plower + 1;
583 } else if (incr == -1) {
584 trip_count = *plower - *pupperDist + 1;
585 } else if (incr > 1) {
586 // upper-lower can exceed the limit of signed type
587 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
588 } else {
589 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
590 }
591 KMP_DEBUG_ASSERT(trip_count);
592 switch (schedule) {
593 case kmp_sch_static: {
594 if (trip_count <= nth) {
595 KMP_DEBUG_ASSERT(
596 __kmp_static == kmp_sch_static_greedy ||
597 __kmp_static ==
598 kmp_sch_static_balanced); // Unknown static scheduling type.
599 if (tid < trip_count)
600 *pupper = *plower = *plower + tid * incr;
601 else
602 *plower = *pupper + incr; // no iterations available
603 if (plastiter != NULL)
604 if (*plastiter != 0 && !(tid == trip_count - 1))
605 *plastiter = 0;
606 } else {
607 if (__kmp_static == kmp_sch_static_balanced) {
608 UT chunkL = trip_count / nth;
609 UT extras = trip_count % nth;
610 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
611 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
612 if (plastiter != NULL)
613 if (*plastiter != 0 && !(tid == nth - 1))
614 *plastiter = 0;
615 } else {
616 T chunk_inc_count =
617 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
618 T upper = *pupperDist;
619 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
620 // Unknown static scheduling type.
621 *plower += tid * chunk_inc_count;
622 *pupper = *plower + chunk_inc_count - incr;
623 if (incr > 0) {
624 if (*pupper < *plower)
625 *pupper = traits_t<T>::max_value;
626 if (plastiter != NULL)
627 if (*plastiter != 0 &&
628 !(*plower <= upper && *pupper > upper - incr))
629 *plastiter = 0;
630 if (*pupper > upper)
631 *pupper = upper; // tracker C73258
632 } else {
633 if (*pupper > *plower)
634 *pupper = traits_t<T>::min_value;
635 if (plastiter != NULL)
636 if (*plastiter != 0 &&
637 !(*plower >= upper && *pupper < upper - incr))
638 *plastiter = 0;
639 if (*pupper < upper)
640 *pupper = upper; // tracker C73258
641 }
642 }
643 }
644 break;
645 }
646 case kmp_sch_static_chunked: {
647 ST span;
648 if (chunk < 1)
649 chunk = 1;
650 span = chunk * incr;
651 *pstride = span * nth;
652 *plower = *plower + (span * tid);
653 *pupper = *plower + span - incr;
654 if (plastiter != NULL)
655 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
656 *plastiter = 0;
657 break;
658 }
659 default:
660 KMP_ASSERT2(0,
661 "__kmpc_dist_for_static_init: unknown loop scheduling type");
662 break;
663 }
664 }
665end:;
666#ifdef KMP_DEBUG
667 {
668 char *buff;
669 // create format specifiers before the debug output
670 buff = __kmp_str_format(
671 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
672 "stride=%%%s signed?<%s>\n",
673 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
674 traits_t<ST>::spec, traits_t<T>::spec);
675 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
676 __kmp_str_free(&buff);
677 }
678#endif
679 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
680 KMP_STATS_LOOP_END(OMP_distribute_iterations);
681 return;
682}
683
684template <typename T>
685static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
686 kmp_int32 *p_last, T *p_lb, T *p_ub,
687 typename traits_t<T>::signed_t *p_st,
688 typename traits_t<T>::signed_t incr,
689 typename traits_t<T>::signed_t chunk) {
690 // The routine returns the first chunk distributed to the team and
691 // stride for next chunks calculation.
692 // Last iteration flag set for the team that will execute
693 // the last iteration of the loop.
694 // The routine is called for dist_schedule(static,chunk) only.
695 typedef typename traits_t<T>::unsigned_t UT;
696 typedef typename traits_t<T>::signed_t ST;
697 kmp_uint32 team_id;
698 kmp_uint32 nteams;
699 UT trip_count;
700 T lower;
701 T upper;
702 ST span;
703 kmp_team_t *team;
704 kmp_info_t *th;
705
706 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
707 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
708 __kmp_assert_valid_gtid(gtid);
709#ifdef KMP_DEBUG
710 {
711 char *buff;
712 // create format specifiers before the debug output
713 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
714 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
715 traits_t<T>::spec, traits_t<T>::spec,
716 traits_t<ST>::spec, traits_t<ST>::spec,
717 traits_t<T>::spec);
718 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
719 __kmp_str_free(&buff);
720 }
721#endif
722
723 lower = *p_lb;
724 upper = *p_ub;
725 if (__kmp_env_consistency_check) {
726 if (incr == 0) {
727 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
728 loc);
729 }
730 if (incr > 0 ? (upper < lower) : (lower < upper)) {
731 // The loop is illegal.
732 // Some zero-trip loops maintained by compiler, e.g.:
733 // for(i=10;i<0;++i) // lower >= upper - run-time check
734 // for(i=0;i>10;--i) // lower <= upper - run-time check
735 // for(i=0;i>10;++i) // incr > 0 - compile-time check
736 // for(i=10;i<0;--i) // incr < 0 - compile-time check
737 // Compiler does not check the following illegal loops:
738 // for(i=0;i<10;i+=incr) // where incr<0
739 // for(i=10;i>0;i-=incr) // where incr<0
740 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
741 }
742 }
743 th = __kmp_threads[gtid];
744 team = th->th.th_team;
745 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
746 nteams = th->th.th_teams_size.nteams;
747 team_id = team->t.t_master_tid;
748 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
749
750 // compute trip count
751 if (incr == 1) {
752 trip_count = upper - lower + 1;
753 } else if (incr == -1) {
754 trip_count = lower - upper + 1;
755 } else if (incr > 0) {
756 // upper-lower can exceed the limit of signed type
757 trip_count = (UT)(upper - lower) / incr + 1;
758 } else {
759 trip_count = (UT)(lower - upper) / (-incr) + 1;
760 }
761 if (chunk < 1)
762 chunk = 1;
763 span = chunk * incr;
764 *p_st = span * nteams;
765 *p_lb = lower + (span * team_id);
766 *p_ub = *p_lb + span - incr;
767 if (p_last != NULL)
768 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
769 // Correct upper bound if needed
770 if (incr > 0) {
771 if (*p_ub < *p_lb) // overflow?
772 *p_ub = traits_t<T>::max_value;
773 if (*p_ub > upper)
774 *p_ub = upper; // tracker C73258
775 } else { // incr < 0
776 if (*p_ub > *p_lb)
777 *p_ub = traits_t<T>::min_value;
778 if (*p_ub < upper)
779 *p_ub = upper; // tracker C73258
780 }
781#ifdef KMP_DEBUG
782 {
783 char *buff;
784 // create format specifiers before the debug output
785 buff =
786 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
787 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
788 traits_t<T>::spec, traits_t<T>::spec,
789 traits_t<ST>::spec, traits_t<ST>::spec);
790 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
791 __kmp_str_free(&buff);
792 }
793#endif
794}
795
796//------------------------------------------------------------------------------
797extern "C" {
819void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
820 kmp_int32 *plastiter, kmp_int32 *plower,
821 kmp_int32 *pupper, kmp_int32 *pstride,
822 kmp_int32 incr, kmp_int32 chunk) {
823 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
824 pupper, pstride, incr, chunk
825#if OMPT_SUPPORT && OMPT_OPTIONAL
826 ,
827 OMPT_GET_RETURN_ADDRESS(0)
828#endif
829 );
830}
831
835void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
836 kmp_int32 schedtype, kmp_int32 *plastiter,
837 kmp_uint32 *plower, kmp_uint32 *pupper,
838 kmp_int32 *pstride, kmp_int32 incr,
839 kmp_int32 chunk) {
840 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
841 pupper, pstride, incr, chunk
842#if OMPT_SUPPORT && OMPT_OPTIONAL
843 ,
844 OMPT_GET_RETURN_ADDRESS(0)
845#endif
846 );
847}
848
852void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
853 kmp_int32 *plastiter, kmp_int64 *plower,
854 kmp_int64 *pupper, kmp_int64 *pstride,
855 kmp_int64 incr, kmp_int64 chunk) {
856 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
857 pupper, pstride, incr, chunk
858#if OMPT_SUPPORT && OMPT_OPTIONAL
859 ,
860 OMPT_GET_RETURN_ADDRESS(0)
861#endif
862 );
863}
864
868void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
869 kmp_int32 schedtype, kmp_int32 *plastiter,
870 kmp_uint64 *plower, kmp_uint64 *pupper,
871 kmp_int64 *pstride, kmp_int64 incr,
872 kmp_int64 chunk) {
873 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
874 pupper, pstride, incr, chunk
875#if OMPT_SUPPORT && OMPT_OPTIONAL
876 ,
877 OMPT_GET_RETURN_ADDRESS(0)
878#endif
879 );
880}
907void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
908 kmp_int32 schedule, kmp_int32 *plastiter,
909 kmp_int32 *plower, kmp_int32 *pupper,
910 kmp_int32 *pupperD, kmp_int32 *pstride,
911 kmp_int32 incr, kmp_int32 chunk) {
912 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
913 pupper, pupperD, pstride, incr, chunk);
914}
915
919void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
920 kmp_int32 schedule, kmp_int32 *plastiter,
921 kmp_uint32 *plower, kmp_uint32 *pupper,
922 kmp_uint32 *pupperD, kmp_int32 *pstride,
923 kmp_int32 incr, kmp_int32 chunk) {
924 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
925 pupper, pupperD, pstride, incr, chunk);
926}
927
931void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
932 kmp_int32 schedule, kmp_int32 *plastiter,
933 kmp_int64 *plower, kmp_int64 *pupper,
934 kmp_int64 *pupperD, kmp_int64 *pstride,
935 kmp_int64 incr, kmp_int64 chunk) {
936 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
937 pupper, pupperD, pstride, incr, chunk);
938}
939
943void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
944 kmp_int32 schedule, kmp_int32 *plastiter,
945 kmp_uint64 *plower, kmp_uint64 *pupper,
946 kmp_uint64 *pupperD, kmp_int64 *pstride,
947 kmp_int64 incr, kmp_int64 chunk) {
948 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
949 pupper, pupperD, pstride, incr, chunk);
950}
955//------------------------------------------------------------------------------
956// Auxiliary routines for Distribute Parallel Loop construct implementation
957// Transfer call to template< type T >
958// __kmp_team_static_init( ident_t *loc, int gtid,
959// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
960
981void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
982 kmp_int32 *p_lb, kmp_int32 *p_ub,
983 kmp_int32 *p_st, kmp_int32 incr,
984 kmp_int32 chunk) {
985 KMP_DEBUG_ASSERT(__kmp_init_serial);
986 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
987 chunk);
988}
989
993void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
994 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
995 kmp_int32 *p_st, kmp_int32 incr,
996 kmp_int32 chunk) {
997 KMP_DEBUG_ASSERT(__kmp_init_serial);
998 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
999 chunk);
1000}
1001
1005void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1006 kmp_int64 *p_lb, kmp_int64 *p_ub,
1007 kmp_int64 *p_st, kmp_int64 incr,
1008 kmp_int64 chunk) {
1009 KMP_DEBUG_ASSERT(__kmp_init_serial);
1010 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1011 chunk);
1012}
1013
1017void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1018 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1019 kmp_int64 *p_st, kmp_int64 incr,
1020 kmp_int64 chunk) {
1021 KMP_DEBUG_ASSERT(__kmp_init_serial);
1022 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1023 chunk);
1024}
1029} // extern "C"
@ KMP_IDENT_KMPC
Definition: kmp.h:196
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:214
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:216
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:218
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
Definition: kmp_stats.h:895
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:908
void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:852
void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:931
void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:835
void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1005
void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:919
void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1017
void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:868
void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:993
void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:907
void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:819
void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:981
void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:943
@ kmp_sch_static
Definition: kmp.h:360
@ kmp_distribute_static
Definition: kmp.h:396
@ kmp_ord_upper
Definition: kmp.h:392
Definition: kmp.h:234
char const * psource
Definition: kmp.h:244
kmp_int32 flags
Definition: kmp.h:236