14 #include "kmp_affinity.h"
15 #include "kmp_atomic.h"
16 #include "kmp_environment.h"
17 #include "kmp_error.h"
21 #include "kmp_settings.h"
22 #include "kmp_stats.h"
24 #include "kmp_wait_release.h"
25 #include "kmp_wrapper_getpid.h"
26 #include "kmp_dispatch.h"
27 #if KMP_USE_HIER_SCHED
28 #include "kmp_dispatch_hier.h"
32 #include "ompt-specific.h"
35 #include "ompd-specific.h"
38 #if OMP_PROFILING_SUPPORT
39 #include "llvm/Support/TimeProfiler.h"
40 static char *ProfileTraceFile =
nullptr;
44 #define KMP_USE_PRCTL 0
59 #if defined(KMP_GOMP_COMPAT)
60 char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX
"alternative compiler support: yes";
64 char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
68 char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX
"lock type: run time selectable";
72 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
77 kmp_info_t __kmp_monitor;
82 void __kmp_cleanup(
void);
84 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
86 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
87 kmp_internal_control_t *new_icvs,
89 #if KMP_AFFINITY_SUPPORTED
90 static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
93 static void __kmp_do_serial_initialize(
void);
94 void __kmp_fork_barrier(
int gtid,
int tid);
95 void __kmp_join_barrier(
int gtid);
96 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
97 kmp_internal_control_t *new_icvs,
ident_t *loc);
99 #ifdef USE_LOAD_BALANCE
100 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
103 static int __kmp_expand_threads(
int nNeed);
105 static int __kmp_unregister_root_other_thread(
int gtid);
107 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
108 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
110 void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
112 void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
117 int __kmp_get_global_thread_id() {
119 kmp_info_t **other_threads;
127 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
128 __kmp_nth, __kmp_all_nth));
135 if (!TCR_4(__kmp_init_gtid))
138 #ifdef KMP_TDATA_GTID
139 if (TCR_4(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
144 if (TCR_4(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
146 return __kmp_gtid_get_specific();
148 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
150 stack_addr = (
char *)&stack_data;
151 other_threads = __kmp_threads;
164 for (i = 0; i < __kmp_threads_capacity; i++) {
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
171 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
178 if (stack_diff <= stack_size) {
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
189 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
190 "thread, using TLS\n"));
191 i = __kmp_gtid_get_specific();
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i);
205 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
213 stack_base - stack_addr);
217 if (__kmp_storage_map) {
218 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
227 int __kmp_get_global_thread_id_reg() {
230 if (!__kmp_init_serial) {
233 #ifdef KMP_TDATA_GTID
234 if (TCR_4(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
239 if (TCR_4(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
241 gtid = __kmp_gtid_get_specific();
244 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
245 gtid = __kmp_get_global_thread_id();
249 if (gtid == KMP_GTID_DNE) {
251 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
252 "Registering a new gtid.\n"));
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
258 gtid = __kmp_register_root(FALSE);
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
264 KMP_DEBUG_ASSERT(gtid >= 0);
270 void __kmp_check_stack_overlap(kmp_info_t *th) {
272 char *stack_beg = NULL;
273 char *stack_end = NULL;
276 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
277 if (__kmp_storage_map) {
278 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
281 gtid = __kmp_gtid_from_thread(th);
283 if (gtid == KMP_GTID_MONITOR) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)",
"mon",
287 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
301 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
302 if (stack_beg == NULL) {
303 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
325 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
331 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
336 void __kmp_infinite_loop(
void) {
337 static int done = FALSE;
344 #define MAX_MESSAGE 512
346 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE];
351 va_start(ap, format);
352 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (
unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356 #if KMP_PRINT_DATA_PLACEMENT
359 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
363 __kmp_storage_map_verbose = FALSE;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
369 const int page_size = KMP_GET_PAGE_SIZE();
371 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
374 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
377 __kmp_printf_no_lock(
" GTID %d\n", gtid);
386 (
char *)p1 += page_size;
387 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
392 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
393 (
char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
396 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
397 (
char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
404 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
410 void __kmp_warn(
char const *format, ...) {
411 char buffer[MAX_MESSAGE];
414 if (__kmp_generate_warnings == kmp_warnings_off) {
418 va_start(ap, format);
420 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
421 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
422 __kmp_vprintf(kmp_err, buffer, ap);
423 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
428 void __kmp_abort_process() {
430 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
432 if (__kmp_debug_buf) {
433 __kmp_dump_debug_buffer();
436 if (KMP_OS_WINDOWS) {
439 __kmp_global.g.g_abort = SIGABRT;
453 __kmp_unregister_library();
457 __kmp_infinite_loop();
458 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
462 void __kmp_abort_thread(
void) {
465 __kmp_infinite_loop();
471 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
472 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
476 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
478 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
479 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
481 __kmp_print_storage_map_gtid(
482 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
483 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
486 &thr->th.th_bar[bs_plain_barrier + 1],
487 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
491 &thr->th.th_bar[bs_forkjoin_barrier + 1],
492 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
495 #if KMP_FAST_REDUCTION_BARRIER
496 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
497 &thr->th.th_bar[bs_reduction_barrier + 1],
498 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
506 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
507 int team_id,
int num_thr) {
508 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
509 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
513 &team->t.t_bar[bs_last_barrier],
514 sizeof(kmp_balign_team_t) * bs_last_barrier,
515 "%s_%d.t_bar", header, team_id);
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
518 &team->t.t_bar[bs_plain_barrier + 1],
519 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
522 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
523 &team->t.t_bar[bs_forkjoin_barrier + 1],
524 sizeof(kmp_balign_team_t),
525 "%s_%d.t_bar[forkjoin]", header, team_id);
527 #if KMP_FAST_REDUCTION_BARRIER
528 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
529 &team->t.t_bar[bs_reduction_barrier + 1],
530 sizeof(kmp_balign_team_t),
531 "%s_%d.t_bar[reduction]", header, team_id);
534 __kmp_print_storage_map_gtid(
535 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
536 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
538 __kmp_print_storage_map_gtid(
539 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
540 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
542 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
543 &team->t.t_disp_buffer[num_disp_buff],
544 sizeof(dispatch_shared_info_t) * num_disp_buff,
545 "%s_%d.t_disp_buffer", header, team_id);
548 static void __kmp_init_allocator() {
549 __kmp_init_memkind();
550 __kmp_init_target_mem();
552 static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
559 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
564 case DLL_PROCESS_ATTACH:
565 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
569 case DLL_PROCESS_DETACH:
570 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
583 if (lpReserved == NULL)
584 __kmp_internal_end_library(__kmp_gtid_get_specific());
588 case DLL_THREAD_ATTACH:
589 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
595 case DLL_THREAD_DETACH:
596 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
598 __kmp_internal_end_thread(__kmp_gtid_get_specific());
609 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
610 int gtid = *gtid_ref;
611 #ifdef BUILD_PARALLEL_ORDERED
612 kmp_team_t *team = __kmp_team_from_gtid(gtid);
615 if (__kmp_env_consistency_check) {
616 if (__kmp_threads[gtid]->th.th_root->r.r_active)
617 #if KMP_USE_DYNAMIC_LOCK
618 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
620 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
623 #ifdef BUILD_PARALLEL_ORDERED
624 if (!team->t.t_serialized) {
626 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
634 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
635 int gtid = *gtid_ref;
636 #ifdef BUILD_PARALLEL_ORDERED
637 int tid = __kmp_tid_from_gtid(gtid);
638 kmp_team_t *team = __kmp_team_from_gtid(gtid);
641 if (__kmp_env_consistency_check) {
642 if (__kmp_threads[gtid]->th.th_root->r.r_active)
643 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
645 #ifdef BUILD_PARALLEL_ORDERED
646 if (!team->t.t_serialized) {
651 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
661 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
666 if (!TCR_4(__kmp_init_parallel))
667 __kmp_parallel_initialize();
668 __kmp_resume_if_soft_paused();
670 th = __kmp_threads[gtid];
671 team = th->th.th_team;
674 th->th.th_ident = id_ref;
676 if (team->t.t_serialized) {
679 kmp_int32 old_this = th->th.th_local.this_construct;
681 ++th->th.th_local.this_construct;
685 if (team->t.t_construct == old_this) {
686 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
687 th->th.th_local.this_construct);
690 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
691 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
692 team->t.t_active_level == 1) {
694 __kmp_itt_metadata_single(id_ref);
699 if (__kmp_env_consistency_check) {
700 if (status && push_ws) {
701 __kmp_push_workshare(gtid, ct_psingle, id_ref);
703 __kmp_check_workshare(gtid, ct_psingle, id_ref);
708 __kmp_itt_single_start(gtid);
714 void __kmp_exit_single(
int gtid) {
716 __kmp_itt_single_end(gtid);
718 if (__kmp_env_consistency_check)
719 __kmp_pop_workshare(gtid, ct_psingle, NULL);
728 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
729 int master_tid,
int set_nthreads,
733 KMP_DEBUG_ASSERT(__kmp_init_serial);
734 KMP_DEBUG_ASSERT(root && parent_team);
735 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
739 new_nthreads = set_nthreads;
740 if (!get__dynamic_2(parent_team, master_tid)) {
743 #ifdef USE_LOAD_BALANCE
744 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
745 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
746 if (new_nthreads == 1) {
747 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
748 "reservation to 1 thread\n",
752 if (new_nthreads < set_nthreads) {
753 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
754 "reservation to %d threads\n",
755 master_tid, new_nthreads));
759 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
760 new_nthreads = __kmp_avail_proc - __kmp_nth +
761 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
762 if (new_nthreads <= 1) {
763 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
764 "reservation to 1 thread\n",
768 if (new_nthreads < set_nthreads) {
769 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
770 "reservation to %d threads\n",
771 master_tid, new_nthreads));
773 new_nthreads = set_nthreads;
775 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
776 if (set_nthreads > 2) {
777 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
778 new_nthreads = (new_nthreads % set_nthreads) + 1;
779 if (new_nthreads == 1) {
780 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
781 "reservation to 1 thread\n",
785 if (new_nthreads < set_nthreads) {
786 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
787 "reservation to %d threads\n",
788 master_tid, new_nthreads));
796 if (__kmp_nth + new_nthreads -
797 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
799 int tl_nthreads = __kmp_max_nth - __kmp_nth +
800 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
801 if (tl_nthreads <= 0) {
806 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
807 __kmp_reserve_warn = 1;
808 __kmp_msg(kmp_ms_warning,
809 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
810 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
812 if (tl_nthreads == 1) {
813 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
814 "reduced reservation to 1 thread\n",
818 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
819 "reservation to %d threads\n",
820 master_tid, tl_nthreads));
821 new_nthreads = tl_nthreads;
825 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
826 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
827 if (cg_nthreads + new_nthreads -
828 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
830 int tl_nthreads = max_cg_threads - cg_nthreads +
831 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
832 if (tl_nthreads <= 0) {
837 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
838 __kmp_reserve_warn = 1;
839 __kmp_msg(kmp_ms_warning,
840 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
841 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
843 if (tl_nthreads == 1) {
844 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
845 "reduced reservation to 1 thread\n",
849 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
850 "reservation to %d threads\n",
851 master_tid, tl_nthreads));
852 new_nthreads = tl_nthreads;
858 capacity = __kmp_threads_capacity;
859 if (TCR_PTR(__kmp_threads[0]) == NULL) {
865 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
866 capacity -= __kmp_hidden_helper_threads_num;
868 if (__kmp_nth + new_nthreads -
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
872 int slotsRequired = __kmp_nth + new_nthreads -
873 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
875 int slotsAdded = __kmp_expand_threads(slotsRequired);
876 if (slotsAdded < slotsRequired) {
878 new_nthreads -= (slotsRequired - slotsAdded);
879 KMP_ASSERT(new_nthreads >= 1);
882 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
883 __kmp_reserve_warn = 1;
884 if (__kmp_tp_cached) {
885 __kmp_msg(kmp_ms_warning,
886 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
887 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
888 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
890 __kmp_msg(kmp_ms_warning,
891 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
892 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
899 if (new_nthreads == 1) {
901 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
902 "dead roots and rechecking; requested %d threads\n",
903 __kmp_get_gtid(), set_nthreads));
905 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
907 __kmp_get_gtid(), new_nthreads, set_nthreads));
916 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
917 kmp_info_t *master_th,
int master_gtid,
918 int fork_teams_workers) {
922 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
923 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
927 master_th->th.th_info.ds.ds_tid = 0;
928 master_th->th.th_team = team;
929 master_th->th.th_team_nproc = team->t.t_nproc;
930 master_th->th.th_team_master = master_th;
931 master_th->th.th_team_serialized = FALSE;
932 master_th->th.th_dispatch = &team->t.t_dispatch[0];
935 #if KMP_NESTED_HOT_TEAMS
937 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
940 int level = team->t.t_active_level - 1;
941 if (master_th->th.th_teams_microtask) {
942 if (master_th->th.th_teams_size.nteams > 1) {
946 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
947 master_th->th.th_teams_level == team->t.t_level) {
952 if (level < __kmp_hot_teams_max_level) {
953 if (hot_teams[level].hot_team) {
955 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
959 hot_teams[level].hot_team = team;
960 hot_teams[level].hot_team_nth = team->t.t_nproc;
967 use_hot_team = team == root->r.r_hot_team;
972 team->t.t_threads[0] = master_th;
973 __kmp_initialize_info(master_th, team, 0, master_gtid);
976 for (i = 1; i < team->t.t_nproc; i++) {
979 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
980 team->t.t_threads[i] = thr;
981 KMP_DEBUG_ASSERT(thr);
982 KMP_DEBUG_ASSERT(thr->th.th_team == team);
984 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
985 "T#%d(%d:%d) join =%llu, plain=%llu\n",
986 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
987 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
988 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
989 team->t.t_bar[bs_plain_barrier].b_arrived));
990 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
991 thr->th.th_teams_level = master_th->th.th_teams_level;
992 thr->th.th_teams_size = master_th->th.th_teams_size;
995 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
996 for (b = 0; b < bs_last_barrier; ++b) {
997 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
998 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1000 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1006 #if KMP_AFFINITY_SUPPORTED
1010 if (!fork_teams_workers) {
1011 __kmp_partition_places(team);
1016 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1017 for (i = 0; i < team->t.t_nproc; i++) {
1018 kmp_info_t *thr = team->t.t_threads[i];
1019 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1020 thr->th.th_prev_level != team->t.t_level) {
1021 team->t.t_display_affinity = 1;
1030 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1034 inline static void propagateFPControl(kmp_team_t *team) {
1035 if (__kmp_inherit_fp_control) {
1036 kmp_int16 x87_fpu_control_word;
1040 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1041 __kmp_store_mxcsr(&mxcsr);
1042 mxcsr &= KMP_X86_MXCSR_MASK;
1053 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1054 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1057 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1061 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1067 inline static void updateHWFPControl(kmp_team_t *team) {
1068 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1071 kmp_int16 x87_fpu_control_word;
1073 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1074 __kmp_store_mxcsr(&mxcsr);
1075 mxcsr &= KMP_X86_MXCSR_MASK;
1077 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1078 __kmp_clear_x87_fpu_status_word();
1079 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1082 if (team->t.t_mxcsr != mxcsr) {
1083 __kmp_load_mxcsr(&team->t.t_mxcsr);
1088 #define propagateFPControl(x) ((void)0)
1089 #define updateHWFPControl(x) ((void)0)
1092 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1097 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1098 kmp_info_t *this_thr;
1099 kmp_team_t *serial_team;
1101 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1108 if (!TCR_4(__kmp_init_parallel))
1109 __kmp_parallel_initialize();
1110 __kmp_resume_if_soft_paused();
1112 this_thr = __kmp_threads[global_tid];
1113 serial_team = this_thr->th.th_serial_team;
1116 KMP_DEBUG_ASSERT(serial_team);
1119 if (__kmp_tasking_mode != tskm_immediate_exec) {
1121 this_thr->th.th_task_team ==
1122 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1123 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1125 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1126 "team %p, new task_team = NULL\n",
1127 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1128 this_thr->th.th_task_team = NULL;
1131 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1132 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1133 proc_bind = proc_bind_false;
1134 }
else if (proc_bind == proc_bind_default) {
1137 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1140 this_thr->th.th_set_proc_bind = proc_bind_default;
1143 ompt_data_t ompt_parallel_data = ompt_data_none;
1144 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1145 if (ompt_enabled.enabled &&
1146 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1148 ompt_task_info_t *parent_task_info;
1149 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1151 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1152 if (ompt_enabled.ompt_callback_parallel_begin) {
1155 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1156 &(parent_task_info->task_data), &(parent_task_info->frame),
1157 &ompt_parallel_data, team_size,
1158 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1163 if (this_thr->th.th_team != serial_team) {
1165 int level = this_thr->th.th_team->t.t_level;
1167 if (serial_team->t.t_serialized) {
1170 kmp_team_t *new_team;
1172 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1175 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1179 proc_bind, &this_thr->th.th_current_task->td_icvs,
1180 0 USE_NESTED_HOT_ARG(NULL));
1181 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1182 KMP_ASSERT(new_team);
1185 new_team->t.t_threads[0] = this_thr;
1186 new_team->t.t_parent = this_thr->th.th_team;
1187 serial_team = new_team;
1188 this_thr->th.th_serial_team = serial_team;
1192 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1193 global_tid, serial_team));
1201 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1202 global_tid, serial_team));
1206 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1207 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1208 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1209 serial_team->t.t_ident = loc;
1210 serial_team->t.t_serialized = 1;
1211 serial_team->t.t_nproc = 1;
1212 serial_team->t.t_parent = this_thr->th.th_team;
1213 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1214 this_thr->th.th_team = serial_team;
1215 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1217 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1218 this_thr->th.th_current_task));
1219 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1220 this_thr->th.th_current_task->td_flags.executing = 0;
1222 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1227 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1228 &this_thr->th.th_current_task->td_parent->td_icvs);
1232 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1233 this_thr->th.th_current_task->td_icvs.nproc =
1234 __kmp_nested_nth.nth[level + 1];
1237 if (__kmp_nested_proc_bind.used &&
1238 (level + 1 < __kmp_nested_proc_bind.used)) {
1239 this_thr->th.th_current_task->td_icvs.proc_bind =
1240 __kmp_nested_proc_bind.bind_types[level + 1];
1244 serial_team->t.t_pkfn = (microtask_t)(~0);
1246 this_thr->th.th_info.ds.ds_tid = 0;
1249 this_thr->th.th_team_nproc = 1;
1250 this_thr->th.th_team_master = this_thr;
1251 this_thr->th.th_team_serialized = 1;
1253 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1254 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1255 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1257 propagateFPControl(serial_team);
1260 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1261 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1262 serial_team->t.t_dispatch->th_disp_buffer =
1263 (dispatch_private_info_t *)__kmp_allocate(
1264 sizeof(dispatch_private_info_t));
1266 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1273 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1274 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1275 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1276 ++serial_team->t.t_serialized;
1277 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1280 int level = this_thr->th.th_team->t.t_level;
1283 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1284 this_thr->th.th_current_task->td_icvs.nproc =
1285 __kmp_nested_nth.nth[level + 1];
1287 serial_team->t.t_level++;
1288 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1289 "of serial team %p to %d\n",
1290 global_tid, serial_team, serial_team->t.t_level));
1293 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1295 dispatch_private_info_t *disp_buffer =
1296 (dispatch_private_info_t *)__kmp_allocate(
1297 sizeof(dispatch_private_info_t));
1298 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1299 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1301 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1305 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1309 if (__kmp_display_affinity) {
1310 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1311 this_thr->th.th_prev_num_threads != 1) {
1313 __kmp_aux_display_affinity(global_tid, NULL);
1314 this_thr->th.th_prev_level = serial_team->t.t_level;
1315 this_thr->th.th_prev_num_threads = 1;
1319 if (__kmp_env_consistency_check)
1320 __kmp_push_parallel(global_tid, NULL);
1322 serial_team->t.ompt_team_info.master_return_address = codeptr;
1323 if (ompt_enabled.enabled &&
1324 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1325 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1326 OMPT_GET_FRAME_ADDRESS(0);
1328 ompt_lw_taskteam_t lw_taskteam;
1329 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1330 &ompt_parallel_data, codeptr);
1332 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1336 if (ompt_enabled.ompt_callback_implicit_task) {
1337 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1338 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1339 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1340 ompt_task_implicit);
1341 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1342 __kmp_tid_from_gtid(global_tid);
1346 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1347 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1348 OMPT_GET_FRAME_ADDRESS(0);
1355 int __kmp_fork_call(
ident_t *loc,
int gtid,
1356 enum fork_context_e call_context,
1357 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1362 int master_this_cons;
1364 kmp_team_t *parent_team;
1365 kmp_info_t *master_th;
1369 int master_set_numthreads;
1373 #if KMP_NESTED_HOT_TEAMS
1374 kmp_hot_team_ptr_t **p_hot_teams;
1377 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1380 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1381 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1384 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1386 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1387 __kmp_stkpadding += (short)((kmp_int64)dummy);
1393 if (!TCR_4(__kmp_init_parallel))
1394 __kmp_parallel_initialize();
1395 __kmp_resume_if_soft_paused();
1398 master_th = __kmp_threads[gtid];
1400 parent_team = master_th->th.th_team;
1401 master_tid = master_th->th.th_info.ds.ds_tid;
1402 master_this_cons = master_th->th.th_local.this_construct;
1403 root = master_th->th.th_root;
1404 master_active = root->r.r_active;
1405 master_set_numthreads = master_th->th.th_set_nproc;
1408 ompt_data_t ompt_parallel_data = ompt_data_none;
1409 ompt_data_t *parent_task_data;
1410 ompt_frame_t *ompt_frame;
1411 ompt_data_t *implicit_task_data;
1412 void *return_address = NULL;
1414 if (ompt_enabled.enabled) {
1415 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1417 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1422 __kmp_assign_root_init_mask();
1425 level = parent_team->t.t_level;
1427 active_level = parent_team->t.t_active_level;
1429 teams_level = master_th->th.th_teams_level;
1430 #if KMP_NESTED_HOT_TEAMS
1431 p_hot_teams = &master_th->th.th_hot_teams;
1432 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1433 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1434 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1435 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1437 (*p_hot_teams)[0].hot_team_nth = 1;
1442 if (ompt_enabled.enabled) {
1443 if (ompt_enabled.ompt_callback_parallel_begin) {
1444 int team_size = master_set_numthreads
1445 ? master_set_numthreads
1446 : get__nproc_2(parent_team, master_tid);
1447 int flags = OMPT_INVOKER(call_context) |
1448 ((microtask == (microtask_t)__kmp_teams_master)
1449 ? ompt_parallel_league
1450 : ompt_parallel_team);
1451 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1452 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1455 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1459 master_th->th.th_ident = loc;
1461 if (master_th->th.th_teams_microtask && ap &&
1462 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1466 parent_team->t.t_ident = loc;
1467 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1468 parent_team->t.t_argc = argc;
1469 argv = (
void **)parent_team->t.t_argv;
1470 for (i = argc - 1; i >= 0; --i)
1471 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1473 if (parent_team == master_th->th.th_serial_team) {
1476 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1478 if (call_context == fork_context_gnu) {
1481 parent_team->t.t_serialized--;
1486 parent_team->t.t_pkfn = microtask;
1491 void **exit_frame_p;
1493 ompt_lw_taskteam_t lw_taskteam;
1495 if (ompt_enabled.enabled) {
1496 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1497 &ompt_parallel_data, return_address);
1498 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1500 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1504 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1505 if (ompt_enabled.ompt_callback_implicit_task) {
1506 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1507 __kmp_tid_from_gtid(gtid);
1508 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1509 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1510 implicit_task_data, 1,
1511 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1515 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1517 exit_frame_p = &dummy;
1522 parent_team->t.t_serialized--;
1525 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1526 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1527 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1536 if (ompt_enabled.enabled) {
1537 *exit_frame_p = NULL;
1538 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1539 if (ompt_enabled.ompt_callback_implicit_task) {
1540 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1541 ompt_scope_end, NULL, implicit_task_data, 1,
1542 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1544 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1545 __ompt_lw_taskteam_unlink(master_th);
1546 if (ompt_enabled.ompt_callback_parallel_end) {
1547 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1548 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1549 OMPT_INVOKER(call_context) | ompt_parallel_team,
1552 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1558 parent_team->t.t_pkfn = microtask;
1559 parent_team->t.t_invoke = invoker;
1560 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1561 parent_team->t.t_active_level++;
1562 parent_team->t.t_level++;
1563 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1566 if (ompt_enabled.enabled) {
1567 ompt_lw_taskteam_t lw_taskteam;
1568 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1569 &ompt_parallel_data, return_address);
1570 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1575 if (master_set_numthreads) {
1576 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1578 kmp_info_t **other_threads = parent_team->t.t_threads;
1581 int old_proc = master_th->th.th_teams_size.nth;
1582 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
1584 __kmp_resize_dist_barrier(parent_team, old_proc,
1585 master_set_numthreads);
1586 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1588 parent_team->t.t_nproc = master_set_numthreads;
1589 for (i = 0; i < master_set_numthreads; ++i) {
1590 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1594 master_th->th.th_set_nproc = 0;
1598 if (__kmp_debugging) {
1599 int nth = __kmp_omp_num_threads(loc);
1601 master_set_numthreads = nth;
1607 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1609 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1610 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1611 proc_bind = proc_bind_false;
1614 if (proc_bind == proc_bind_default) {
1615 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1622 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1623 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1624 master_th->th.th_current_task->td_icvs.proc_bind)) {
1625 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1628 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1630 if (proc_bind_icv != proc_bind_default &&
1631 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1632 kmp_info_t **other_threads = parent_team->t.t_threads;
1633 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1634 other_threads[i]->th.th_current_task->td_icvs.proc_bind =
1639 master_th->th.th_set_proc_bind = proc_bind_default;
1641 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1642 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1644 __kmp_forkjoin_frames_mode == 3 &&
1645 parent_team->t.t_active_level == 1
1646 && master_th->th.th_teams_size.nteams == 1) {
1647 kmp_uint64 tmp_time = __itt_get_timestamp();
1648 master_th->th.th_frame_time = tmp_time;
1649 parent_team->t.t_region_time = tmp_time;
1651 if (__itt_stack_caller_create_ptr) {
1652 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1654 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1657 #if KMP_AFFINITY_SUPPORTED
1658 __kmp_partition_places(parent_team);
1661 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, "
1662 "master_th=%p, gtid=%d\n",
1663 root, parent_team, master_th, gtid));
1664 __kmp_internal_fork(loc, gtid, parent_team);
1665 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, "
1666 "master_th=%p, gtid=%d\n",
1667 root, parent_team, master_th, gtid));
1669 if (call_context == fork_context_gnu)
1673 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1674 parent_team->t.t_id, parent_team->t.t_pkfn));
1676 if (!parent_team->t.t_invoke(gtid)) {
1677 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1679 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1680 parent_team->t.t_id, parent_team->t.t_pkfn));
1683 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1689 if (__kmp_tasking_mode != tskm_immediate_exec) {
1690 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1691 parent_team->t.t_task_team[master_th->th.th_task_state]);
1698 int enter_teams = 0;
1699 if (parent_team->t.t_active_level >=
1700 master_th->th.th_current_task->td_icvs.max_active_levels) {
1703 enter_teams = ((ap == NULL && active_level == 0) ||
1704 (ap && teams_level > 0 && teams_level == level));
1705 nthreads = master_set_numthreads
1706 ? master_set_numthreads
1708 : get__nproc_2(parent_team, master_tid);
1713 if ((get__max_active_levels(master_th) == 1 &&
1714 (root->r.r_in_parallel && !enter_teams)) ||
1715 (__kmp_library == library_serial)) {
1716 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d"
1724 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1729 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
1730 nthreads, enter_teams);
1731 if (nthreads == 1) {
1735 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1739 KMP_DEBUG_ASSERT(nthreads > 0);
1742 master_th->th.th_set_nproc = 0;
1745 if (nthreads == 1) {
1747 #if KMP_OS_LINUX && \
1748 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1751 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1756 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1761 master_th->th.th_serial_team->t.t_pkfn = microtask;
1764 if (call_context == fork_context_intel) {
1766 master_th->th.th_serial_team->t.t_ident = loc;
1769 master_th->th.th_serial_team->t.t_level--;
1774 void **exit_frame_p;
1775 ompt_task_info_t *task_info;
1777 ompt_lw_taskteam_t lw_taskteam;
1779 if (ompt_enabled.enabled) {
1780 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1781 &ompt_parallel_data, return_address);
1783 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1786 task_info = OMPT_CUR_TASK_INFO(master_th);
1787 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1788 if (ompt_enabled.ompt_callback_implicit_task) {
1789 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1790 __kmp_tid_from_gtid(gtid);
1791 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1792 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1793 &(task_info->task_data), 1,
1794 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1795 ompt_task_implicit);
1799 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1801 exit_frame_p = &dummy;
1806 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1807 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1808 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1809 parent_team->t.t_argv
1818 if (ompt_enabled.enabled) {
1819 *exit_frame_p = NULL;
1820 if (ompt_enabled.ompt_callback_implicit_task) {
1821 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1822 ompt_scope_end, NULL, &(task_info->task_data), 1,
1823 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1824 ompt_task_implicit);
1826 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1827 __ompt_lw_taskteam_unlink(master_th);
1828 if (ompt_enabled.ompt_callback_parallel_end) {
1829 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1830 &ompt_parallel_data, parent_task_data,
1831 OMPT_INVOKER(call_context) | ompt_parallel_team,
1834 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1837 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1838 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1839 master_th->th.th_serial_team);
1840 team = master_th->th.th_team;
1842 team->t.t_invoke = invoker;
1843 __kmp_alloc_argv_entries(argc, team, TRUE);
1844 team->t.t_argc = argc;
1845 argv = (
void **)team->t.t_argv;
1847 for (i = argc - 1; i >= 0; --i)
1848 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1850 for (i = 0; i < argc; ++i)
1852 argv[i] = parent_team->t.t_argv[i];
1860 if (ompt_enabled.enabled) {
1861 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1862 if (ompt_enabled.ompt_callback_implicit_task) {
1863 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1864 ompt_scope_end, NULL, &(task_info->task_data), 0,
1865 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1867 if (ompt_enabled.ompt_callback_parallel_end) {
1868 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1869 &ompt_parallel_data, parent_task_data,
1870 OMPT_INVOKER(call_context) | ompt_parallel_league,
1873 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1878 for (i = argc - 1; i >= 0; --i)
1879 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1884 void **exit_frame_p;
1885 ompt_task_info_t *task_info;
1887 ompt_lw_taskteam_t lw_taskteam;
1889 if (ompt_enabled.enabled) {
1890 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1891 &ompt_parallel_data, return_address);
1892 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1894 task_info = OMPT_CUR_TASK_INFO(master_th);
1895 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1898 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1899 if (ompt_enabled.ompt_callback_implicit_task) {
1900 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1901 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1902 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1903 ompt_task_implicit);
1904 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1905 __kmp_tid_from_gtid(gtid);
1909 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1911 exit_frame_p = &dummy;
1916 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1917 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1918 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1927 if (ompt_enabled.enabled) {
1928 *exit_frame_p = NULL;
1929 if (ompt_enabled.ompt_callback_implicit_task) {
1930 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1931 ompt_scope_end, NULL, &(task_info->task_data), 1,
1932 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1933 ompt_task_implicit);
1936 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1937 __ompt_lw_taskteam_unlink(master_th);
1938 if (ompt_enabled.ompt_callback_parallel_end) {
1939 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1940 &ompt_parallel_data, parent_task_data,
1941 OMPT_INVOKER(call_context) | ompt_parallel_team,
1944 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1948 }
else if (call_context == fork_context_gnu) {
1950 ompt_lw_taskteam_t lwt;
1951 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1954 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1955 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1960 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1963 KMP_ASSERT2(call_context < fork_context_last,
1964 "__kmp_fork_call: unknown fork_context parameter");
1967 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1974 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1975 "curtask=%p, curtask_max_aclevel=%d\n",
1976 parent_team->t.t_active_level, master_th,
1977 master_th->th.th_current_task,
1978 master_th->th.th_current_task->td_icvs.max_active_levels));
1982 master_th->th.th_current_task->td_flags.executing = 0;
1984 if (!master_th->th.th_teams_microtask || level > teams_level) {
1986 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1990 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1991 if ((level + 1 < __kmp_nested_nth.used) &&
1992 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1993 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1999 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2001 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2002 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2003 proc_bind = proc_bind_false;
2007 if (proc_bind == proc_bind_default) {
2008 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2011 if (master_th->th.th_teams_microtask &&
2012 microtask == (microtask_t)__kmp_teams_master) {
2013 proc_bind = __kmp_teams_proc_bind;
2019 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2020 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2021 master_th->th.th_current_task->td_icvs.proc_bind)) {
2024 if (!master_th->th.th_teams_microtask ||
2025 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2026 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2031 master_th->th.th_set_proc_bind = proc_bind_default;
2033 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2034 kmp_internal_control_t new_icvs;
2035 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2036 new_icvs.next = NULL;
2037 if (nthreads_icv > 0) {
2038 new_icvs.nproc = nthreads_icv;
2040 if (proc_bind_icv != proc_bind_default) {
2041 new_icvs.proc_bind = proc_bind_icv;
2045 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2046 team = __kmp_allocate_team(root, nthreads, nthreads,
2050 proc_bind, &new_icvs,
2051 argc USE_NESTED_HOT_ARG(master_th));
2052 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2053 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2056 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2057 team = __kmp_allocate_team(root, nthreads, nthreads,
2062 &master_th->th.th_current_task->td_icvs,
2063 argc USE_NESTED_HOT_ARG(master_th));
2064 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2065 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2066 &master_th->th.th_current_task->td_icvs);
2069 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2072 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2073 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2074 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2075 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2076 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2078 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2081 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2083 if (!master_th->th.th_teams_microtask || level > teams_level) {
2084 int new_level = parent_team->t.t_level + 1;
2085 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2086 new_level = parent_team->t.t_active_level + 1;
2087 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2090 int new_level = parent_team->t.t_level;
2091 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2092 new_level = parent_team->t.t_active_level;
2093 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2095 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2097 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2099 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2100 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2103 propagateFPControl(team);
2105 if (ompd_state & OMPD_ENABLE_BP)
2106 ompd_bp_parallel_begin();
2109 if (__kmp_tasking_mode != tskm_immediate_exec) {
2112 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2113 parent_team->t.t_task_team[master_th->th.th_task_state]);
2114 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2115 "%p, new task_team %p / team %p\n",
2116 __kmp_gtid_from_thread(master_th),
2117 master_th->th.th_task_team, parent_team,
2118 team->t.t_task_team[master_th->th.th_task_state], team));
2120 if (active_level || master_th->th.th_task_team) {
2122 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2123 if (master_th->th.th_task_state_top >=
2124 master_th->th.th_task_state_stack_sz) {
2125 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2126 kmp_uint8 *old_stack, *new_stack;
2128 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2129 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2130 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2132 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2136 old_stack = master_th->th.th_task_state_memo_stack;
2137 master_th->th.th_task_state_memo_stack = new_stack;
2138 master_th->th.th_task_state_stack_sz = new_size;
2139 __kmp_free(old_stack);
2143 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2144 master_th->th.th_task_state;
2145 master_th->th.th_task_state_top++;
2146 #if KMP_NESTED_HOT_TEAMS
2147 if (master_th->th.th_hot_teams &&
2148 active_level < __kmp_hot_teams_max_level &&
2149 team == master_th->th.th_hot_teams[active_level].hot_team) {
2151 master_th->th.th_task_state =
2153 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2156 master_th->th.th_task_state = 0;
2157 #if KMP_NESTED_HOT_TEAMS
2161 #if !KMP_NESTED_HOT_TEAMS
2162 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2163 (team == root->r.r_hot_team));
2169 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2170 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2172 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2173 (team->t.t_master_tid == 0 &&
2174 (team->t.t_parent == root->r.r_root_team ||
2175 team->t.t_parent->t.t_serialized)));
2179 argv = (
void **)team->t.t_argv;
2181 for (i = argc - 1; i >= 0; --i) {
2182 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2183 KMP_CHECK_UPDATE(*argv, new_argv);
2187 for (i = 0; i < argc; ++i) {
2189 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2194 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2195 if (!root->r.r_active)
2196 root->r.r_active = TRUE;
2198 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2199 __kmp_setup_icv_copy(team, nthreads,
2200 &master_th->th.th_current_task->td_icvs, loc);
2203 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2206 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2209 if (team->t.t_active_level == 1
2210 && !master_th->th.th_teams_microtask) {
2212 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2213 (__kmp_forkjoin_frames_mode == 3 ||
2214 __kmp_forkjoin_frames_mode == 1)) {
2215 kmp_uint64 tmp_time = 0;
2216 if (__itt_get_timestamp_ptr)
2217 tmp_time = __itt_get_timestamp();
2219 master_th->th.th_frame_time = tmp_time;
2220 if (__kmp_forkjoin_frames_mode == 3)
2221 team->t.t_region_time = tmp_time;
2225 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2226 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2228 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2234 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2237 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2238 root, team, master_th, gtid));
2241 if (__itt_stack_caller_create_ptr) {
2244 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2245 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2246 }
else if (parent_team->t.t_serialized) {
2251 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2252 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2260 __kmp_internal_fork(loc, gtid, team);
2261 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2262 "master_th=%p, gtid=%d\n",
2263 root, team, master_th, gtid));
2266 if (call_context == fork_context_gnu) {
2267 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2272 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2273 team->t.t_id, team->t.t_pkfn));
2276 #if KMP_STATS_ENABLED
2280 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2284 if (!team->t.t_invoke(gtid)) {
2285 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2288 #if KMP_STATS_ENABLED
2291 KMP_SET_THREAD_STATE(previous_state);
2295 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2296 team->t.t_id, team->t.t_pkfn));
2299 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2301 if (ompt_enabled.enabled) {
2302 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2310 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2313 thread->th.ompt_thread_info.state =
2314 ((team->t.t_serialized) ? ompt_state_work_serial
2315 : ompt_state_work_parallel);
2318 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2319 kmp_team_t *team, ompt_data_t *parallel_data,
2320 int flags,
void *codeptr) {
2321 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2322 if (ompt_enabled.ompt_callback_parallel_end) {
2323 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2324 parallel_data, &(task_info->task_data), flags, codeptr);
2327 task_info->frame.enter_frame = ompt_data_none;
2328 __kmp_join_restore_state(thread, team);
2332 void __kmp_join_call(
ident_t *loc,
int gtid
2335 enum fork_context_e fork_context
2339 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2341 kmp_team_t *parent_team;
2342 kmp_info_t *master_th;
2346 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2349 master_th = __kmp_threads[gtid];
2350 root = master_th->th.th_root;
2351 team = master_th->th.th_team;
2352 parent_team = team->t.t_parent;
2354 master_th->th.th_ident = loc;
2357 void *team_microtask = (
void *)team->t.t_pkfn;
2361 if (ompt_enabled.enabled &&
2362 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2363 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2368 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2369 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2370 "th_task_team = %p\n",
2371 __kmp_gtid_from_thread(master_th), team,
2372 team->t.t_task_team[master_th->th.th_task_state],
2373 master_th->th.th_task_team));
2374 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2375 team->t.t_task_team[master_th->th.th_task_state]);
2379 if (team->t.t_serialized) {
2380 if (master_th->th.th_teams_microtask) {
2382 int level = team->t.t_level;
2383 int tlevel = master_th->th.th_teams_level;
2384 if (level == tlevel) {
2388 }
else if (level == tlevel + 1) {
2392 team->t.t_serialized++;
2398 if (ompt_enabled.enabled) {
2399 __kmp_join_restore_state(master_th, parent_team);
2406 master_active = team->t.t_master_active;
2411 __kmp_internal_join(loc, gtid, team);
2413 if (__itt_stack_caller_create_ptr) {
2414 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2416 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2417 team->t.t_stack_id = NULL;
2421 master_th->th.th_task_state =
2424 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2425 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2429 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2430 parent_team->t.t_stack_id = NULL;
2434 if (team->t.t_nproc > 1 &&
2435 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2436 team->t.b->update_num_threads(team->t.t_nproc);
2437 __kmp_add_threads_to_team(team, team->t.t_nproc);
2444 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2445 void *codeptr = team->t.ompt_team_info.master_return_address;
2450 if (team->t.t_active_level == 1 &&
2451 (!master_th->th.th_teams_microtask ||
2452 master_th->th.th_teams_size.nteams == 1)) {
2453 master_th->th.th_ident = loc;
2456 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2457 __kmp_forkjoin_frames_mode == 3)
2458 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2459 master_th->th.th_frame_time, 0, loc,
2460 master_th->th.th_team_nproc, 1);
2461 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2462 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2463 __kmp_itt_region_joined(gtid);
2467 #if KMP_AFFINITY_SUPPORTED
2470 master_th->th.th_first_place = team->t.t_first_place;
2471 master_th->th.th_last_place = team->t.t_last_place;
2475 if (master_th->th.th_teams_microtask && !exit_teams &&
2476 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2477 team->t.t_level == master_th->th.th_teams_level + 1) {
2482 ompt_data_t ompt_parallel_data = ompt_data_none;
2483 if (ompt_enabled.enabled) {
2484 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2485 if (ompt_enabled.ompt_callback_implicit_task) {
2486 int ompt_team_size = team->t.t_nproc;
2487 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2488 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2489 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2491 task_info->frame.exit_frame = ompt_data_none;
2492 task_info->task_data = ompt_data_none;
2493 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2494 __ompt_lw_taskteam_unlink(master_th);
2499 team->t.t_active_level--;
2500 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2506 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2507 int old_num = master_th->th.th_team_nproc;
2508 int new_num = master_th->th.th_teams_size.nth;
2509 kmp_info_t **other_threads = team->t.t_threads;
2510 team->t.t_nproc = new_num;
2511 for (
int i = 0; i < old_num; ++i) {
2512 other_threads[i]->th.th_team_nproc = new_num;
2515 for (
int i = old_num; i < new_num; ++i) {
2517 KMP_DEBUG_ASSERT(other_threads[i]);
2518 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2519 for (
int b = 0; b < bs_last_barrier; ++b) {
2520 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2521 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2523 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2526 if (__kmp_tasking_mode != tskm_immediate_exec) {
2528 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2534 if (ompt_enabled.enabled) {
2535 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2536 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2544 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2545 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2547 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2552 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2554 if (!master_th->th.th_teams_microtask ||
2555 team->t.t_level > master_th->th.th_teams_level) {
2557 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2559 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2562 if (ompt_enabled.enabled) {
2563 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2564 if (ompt_enabled.ompt_callback_implicit_task) {
2565 int flags = (team_microtask == (
void *)__kmp_teams_master)
2567 : ompt_task_implicit;
2568 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2569 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2570 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2571 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2573 task_info->frame.exit_frame = ompt_data_none;
2574 task_info->task_data = ompt_data_none;
2578 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2580 __kmp_pop_current_task_from_thread(master_th);
2582 master_th->th.th_def_allocator = team->t.t_def_allocator;
2585 if (ompd_state & OMPD_ENABLE_BP)
2586 ompd_bp_parallel_end();
2588 updateHWFPControl(team);
2590 if (root->r.r_active != master_active)
2591 root->r.r_active = master_active;
2593 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2601 master_th->th.th_team = parent_team;
2602 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2603 master_th->th.th_team_master = parent_team->t.t_threads[0];
2604 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2607 if (parent_team->t.t_serialized &&
2608 parent_team != master_th->th.th_serial_team &&
2609 parent_team != root->r.r_root_team) {
2610 __kmp_free_team(root,
2611 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2612 master_th->th.th_serial_team = parent_team;
2615 if (__kmp_tasking_mode != tskm_immediate_exec) {
2616 if (master_th->th.th_task_state_top >
2618 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2620 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2621 master_th->th.th_task_state;
2622 --master_th->th.th_task_state_top;
2624 master_th->th.th_task_state =
2626 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2629 master_th->th.th_task_team =
2630 parent_team->t.t_task_team[master_th->th.th_task_state];
2632 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2633 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2640 master_th->th.th_current_task->td_flags.executing = 1;
2642 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2646 OMPT_INVOKER(fork_context) |
2647 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2648 : ompt_parallel_team);
2649 if (ompt_enabled.enabled) {
2650 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2656 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2661 void __kmp_save_internal_controls(kmp_info_t *thread) {
2663 if (thread->th.th_team != thread->th.th_serial_team) {
2666 if (thread->th.th_team->t.t_serialized > 1) {
2669 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2672 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2673 thread->th.th_team->t.t_serialized) {
2678 kmp_internal_control_t *control =
2679 (kmp_internal_control_t *)__kmp_allocate(
2680 sizeof(kmp_internal_control_t));
2682 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2684 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2686 control->next = thread->th.th_team->t.t_control_stack_top;
2687 thread->th.th_team->t.t_control_stack_top = control;
2693 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2697 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2698 KMP_DEBUG_ASSERT(__kmp_init_serial);
2702 else if (new_nth > __kmp_max_nth)
2703 new_nth = __kmp_max_nth;
2706 thread = __kmp_threads[gtid];
2707 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2710 __kmp_save_internal_controls(thread);
2712 set__nproc(thread, new_nth);
2717 root = thread->th.th_root;
2718 if (__kmp_init_parallel && (!root->r.r_active) &&
2719 (root->r.r_hot_team->t.t_nproc > new_nth)
2720 #
if KMP_NESTED_HOT_TEAMS
2721 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2724 kmp_team_t *hot_team = root->r.r_hot_team;
2727 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2729 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2730 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2733 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2734 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2735 if (__kmp_tasking_mode != tskm_immediate_exec) {
2738 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2740 __kmp_free_thread(hot_team->t.t_threads[f]);
2741 hot_team->t.t_threads[f] = NULL;
2743 hot_team->t.t_nproc = new_nth;
2744 #if KMP_NESTED_HOT_TEAMS
2745 if (thread->th.th_hot_teams) {
2746 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2747 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2751 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2752 hot_team->t.b->update_num_threads(new_nth);
2753 __kmp_add_threads_to_team(hot_team, new_nth);
2756 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2759 for (f = 0; f < new_nth; f++) {
2760 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2761 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2764 hot_team->t.t_size_changed = -1;
2769 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2772 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2774 gtid, max_active_levels));
2775 KMP_DEBUG_ASSERT(__kmp_init_serial);
2778 if (max_active_levels < 0) {
2779 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2784 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2785 "max_active_levels for thread %d = (%d)\n",
2786 gtid, max_active_levels));
2789 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2794 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2795 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2796 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2802 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2803 "max_active_levels for thread %d = (%d)\n",
2804 gtid, max_active_levels));
2806 thread = __kmp_threads[gtid];
2808 __kmp_save_internal_controls(thread);
2810 set__max_active_levels(thread, max_active_levels);
2814 int __kmp_get_max_active_levels(
int gtid) {
2817 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2818 KMP_DEBUG_ASSERT(__kmp_init_serial);
2820 thread = __kmp_threads[gtid];
2821 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2822 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2823 "curtask_maxaclevel=%d\n",
2824 gtid, thread->th.th_current_task,
2825 thread->th.th_current_task->td_icvs.max_active_levels));
2826 return thread->th.th_current_task->td_icvs.max_active_levels;
2830 void __kmp_set_num_teams(
int num_teams) {
2832 __kmp_nteams = num_teams;
2834 int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2836 void __kmp_set_teams_thread_limit(
int limit) {
2838 __kmp_teams_thread_limit = limit;
2840 int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2842 KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2843 KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2846 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2848 kmp_sched_t orig_kind;
2851 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2852 gtid, (
int)kind, chunk));
2853 KMP_DEBUG_ASSERT(__kmp_init_serial);
2860 kind = __kmp_sched_without_mods(kind);
2862 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2863 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2865 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2866 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2868 kind = kmp_sched_default;
2872 thread = __kmp_threads[gtid];
2874 __kmp_save_internal_controls(thread);
2876 if (kind < kmp_sched_upper_std) {
2877 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2880 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2882 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2883 __kmp_sch_map[kind - kmp_sched_lower - 1];
2888 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2889 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2890 kmp_sched_lower - 2];
2892 __kmp_sched_apply_mods_intkind(
2893 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2894 if (kind == kmp_sched_auto || chunk < 1) {
2896 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2898 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2903 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2907 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2908 KMP_DEBUG_ASSERT(__kmp_init_serial);
2910 thread = __kmp_threads[gtid];
2912 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2913 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2915 case kmp_sch_static_greedy:
2916 case kmp_sch_static_balanced:
2917 *kind = kmp_sched_static;
2918 __kmp_sched_apply_mods_stdkind(kind, th_type);
2921 case kmp_sch_static_chunked:
2922 *kind = kmp_sched_static;
2924 case kmp_sch_dynamic_chunked:
2925 *kind = kmp_sched_dynamic;
2928 case kmp_sch_guided_iterative_chunked:
2929 case kmp_sch_guided_analytical_chunked:
2930 *kind = kmp_sched_guided;
2933 *kind = kmp_sched_auto;
2935 case kmp_sch_trapezoidal:
2936 *kind = kmp_sched_trapezoidal;
2938 #if KMP_STATIC_STEAL_ENABLED
2939 case kmp_sch_static_steal:
2940 *kind = kmp_sched_static_steal;
2944 KMP_FATAL(UnknownSchedulingType, th_type);
2947 __kmp_sched_apply_mods_stdkind(kind, th_type);
2948 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2951 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2957 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2958 KMP_DEBUG_ASSERT(__kmp_init_serial);
2965 thr = __kmp_threads[gtid];
2966 team = thr->th.th_team;
2967 ii = team->t.t_level;
2971 if (thr->th.th_teams_microtask) {
2973 int tlevel = thr->th.th_teams_level;
2976 KMP_DEBUG_ASSERT(ii >= tlevel);
2988 return __kmp_tid_from_gtid(gtid);
2990 dd = team->t.t_serialized;
2992 while (ii > level) {
2993 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2995 if ((team->t.t_serialized) && (!dd)) {
2996 team = team->t.t_parent;
3000 team = team->t.t_parent;
3001 dd = team->t.t_serialized;
3006 return (dd > 1) ? (0) : (team->t.t_master_tid);
3009 int __kmp_get_team_size(
int gtid,
int level) {
3015 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3016 KMP_DEBUG_ASSERT(__kmp_init_serial);
3023 thr = __kmp_threads[gtid];
3024 team = thr->th.th_team;
3025 ii = team->t.t_level;
3029 if (thr->th.th_teams_microtask) {
3031 int tlevel = thr->th.th_teams_level;
3034 KMP_DEBUG_ASSERT(ii >= tlevel);
3045 while (ii > level) {
3046 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3048 if (team->t.t_serialized && (!dd)) {
3049 team = team->t.t_parent;
3053 team = team->t.t_parent;
3058 return team->t.t_nproc;
3061 kmp_r_sched_t __kmp_get_schedule_global() {
3066 kmp_r_sched_t r_sched;
3072 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3073 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3076 r_sched.r_sched_type = __kmp_static;
3079 r_sched.r_sched_type = __kmp_guided;
3081 r_sched.r_sched_type = __kmp_sched;
3083 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3085 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3087 r_sched.chunk = KMP_DEFAULT_CHUNK;
3089 r_sched.chunk = __kmp_chunk;
3097 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3099 KMP_DEBUG_ASSERT(team);
3100 if (!realloc || argc > team->t.t_max_argc) {
3102 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3103 "current entries=%d\n",
3104 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3106 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3107 __kmp_free((
void *)team->t.t_argv);
3109 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3111 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3112 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3114 team->t.t_id, team->t.t_max_argc));
3115 team->t.t_argv = &team->t.t_inline_argv[0];
3116 if (__kmp_storage_map) {
3117 __kmp_print_storage_map_gtid(
3118 -1, &team->t.t_inline_argv[0],
3119 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3120 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3125 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3126 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3128 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3130 team->t.t_id, team->t.t_max_argc));
3132 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3133 if (__kmp_storage_map) {
3134 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3135 &team->t.t_argv[team->t.t_max_argc],
3136 sizeof(
void *) * team->t.t_max_argc,
3137 "team_%d.t_argv", team->t.t_id);
3143 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3145 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3147 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3148 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3149 sizeof(dispatch_shared_info_t) * num_disp_buff);
3150 team->t.t_dispatch =
3151 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3152 team->t.t_implicit_task_taskdata =
3153 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3154 team->t.t_max_nproc = max_nth;
3157 for (i = 0; i < num_disp_buff; ++i) {
3158 team->t.t_disp_buffer[i].buffer_index = i;
3159 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3163 static void __kmp_free_team_arrays(kmp_team_t *team) {
3166 for (i = 0; i < team->t.t_max_nproc; ++i) {
3167 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3168 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3169 team->t.t_dispatch[i].th_disp_buffer = NULL;
3172 #if KMP_USE_HIER_SCHED
3173 __kmp_dispatch_free_hierarchies(team);
3175 __kmp_free(team->t.t_threads);
3176 __kmp_free(team->t.t_disp_buffer);
3177 __kmp_free(team->t.t_dispatch);
3178 __kmp_free(team->t.t_implicit_task_taskdata);
3179 team->t.t_threads = NULL;
3180 team->t.t_disp_buffer = NULL;
3181 team->t.t_dispatch = NULL;
3182 team->t.t_implicit_task_taskdata = 0;
3185 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3186 kmp_info_t **oldThreads = team->t.t_threads;
3188 __kmp_free(team->t.t_disp_buffer);
3189 __kmp_free(team->t.t_dispatch);
3190 __kmp_free(team->t.t_implicit_task_taskdata);
3191 __kmp_allocate_team_arrays(team, max_nth);
3193 KMP_MEMCPY(team->t.t_threads, oldThreads,
3194 team->t.t_nproc *
sizeof(kmp_info_t *));
3196 __kmp_free(oldThreads);
3199 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3201 kmp_r_sched_t r_sched =
3202 __kmp_get_schedule_global();
3204 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3206 kmp_internal_control_t g_icvs = {
3208 (kmp_int8)__kmp_global.g.g_dynamic,
3210 (kmp_int8)__kmp_env_blocktime,
3212 __kmp_dflt_blocktime,
3217 __kmp_dflt_team_nth,
3221 __kmp_dflt_max_active_levels,
3225 __kmp_nested_proc_bind.bind_types[0],
3226 __kmp_default_device,
3233 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3235 kmp_internal_control_t gx_icvs;
3236 gx_icvs.serial_nesting_level =
3238 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3239 gx_icvs.next = NULL;
3244 static void __kmp_initialize_root(kmp_root_t *root) {
3246 kmp_team_t *root_team;
3247 kmp_team_t *hot_team;
3248 int hot_team_max_nth;
3249 kmp_r_sched_t r_sched =
3250 __kmp_get_schedule_global();
3251 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3252 KMP_DEBUG_ASSERT(root);
3253 KMP_ASSERT(!root->r.r_begin);
3256 __kmp_init_lock(&root->r.r_begin_lock);
3257 root->r.r_begin = FALSE;
3258 root->r.r_active = FALSE;
3259 root->r.r_in_parallel = 0;
3260 root->r.r_blocktime = __kmp_dflt_blocktime;
3261 #if KMP_AFFINITY_SUPPORTED
3262 root->r.r_affinity_assigned = FALSE;
3267 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3270 __kmp_allocate_team(root,
3276 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3278 USE_NESTED_HOT_ARG(NULL)
3283 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3286 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3288 root->r.r_root_team = root_team;
3289 root_team->t.t_control_stack_top = NULL;
3292 root_team->t.t_threads[0] = NULL;
3293 root_team->t.t_nproc = 1;
3294 root_team->t.t_serialized = 1;
3296 root_team->t.t_sched.sched = r_sched.sched;
3299 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3300 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3304 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3307 __kmp_allocate_team(root,
3309 __kmp_dflt_team_nth_ub * 2,
3313 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3315 USE_NESTED_HOT_ARG(NULL)
3317 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3319 root->r.r_hot_team = hot_team;
3320 root_team->t.t_control_stack_top = NULL;
3323 hot_team->t.t_parent = root_team;
3326 hot_team_max_nth = hot_team->t.t_max_nproc;
3327 for (f = 0; f < hot_team_max_nth; ++f) {
3328 hot_team->t.t_threads[f] = NULL;
3330 hot_team->t.t_nproc = 1;
3332 hot_team->t.t_sched.sched = r_sched.sched;
3333 hot_team->t.t_size_changed = 0;
3338 typedef struct kmp_team_list_item {
3339 kmp_team_p
const *entry;
3340 struct kmp_team_list_item *next;
3341 } kmp_team_list_item_t;
3342 typedef kmp_team_list_item_t *kmp_team_list_t;
3344 static void __kmp_print_structure_team_accum(
3345 kmp_team_list_t list,
3346 kmp_team_p
const *team
3356 KMP_DEBUG_ASSERT(list != NULL);
3361 __kmp_print_structure_team_accum(list, team->t.t_parent);
3362 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3366 while (l->next != NULL && l->entry != team) {
3369 if (l->next != NULL) {
3375 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3381 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3382 sizeof(kmp_team_list_item_t));
3389 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3392 __kmp_printf(
"%s", title);
3394 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3396 __kmp_printf(
" - (nil)\n");
3400 static void __kmp_print_structure_thread(
char const *title,
3401 kmp_info_p
const *thread) {
3402 __kmp_printf(
"%s", title);
3403 if (thread != NULL) {
3404 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3406 __kmp_printf(
" - (nil)\n");
3410 void __kmp_print_structure(
void) {
3412 kmp_team_list_t list;
3416 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3420 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3421 "Table\n------------------------------\n");
3424 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3425 __kmp_printf(
"%2d", gtid);
3426 if (__kmp_threads != NULL) {
3427 __kmp_printf(
" %p", __kmp_threads[gtid]);
3429 if (__kmp_root != NULL) {
3430 __kmp_printf(
" %p", __kmp_root[gtid]);
3437 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3439 if (__kmp_threads != NULL) {
3441 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3442 kmp_info_t
const *thread = __kmp_threads[gtid];
3443 if (thread != NULL) {
3444 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3445 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3446 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3447 __kmp_print_structure_team(
" Serial Team: ",
3448 thread->th.th_serial_team);
3449 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3450 __kmp_print_structure_thread(
" Primary: ",
3451 thread->th.th_team_master);
3452 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3453 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3454 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3455 __kmp_print_structure_thread(
" Next in pool: ",
3456 thread->th.th_next_pool);
3458 __kmp_print_structure_team_accum(list, thread->th.th_team);
3459 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3463 __kmp_printf(
"Threads array is not allocated.\n");
3467 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3469 if (__kmp_root != NULL) {
3471 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3472 kmp_root_t
const *root = __kmp_root[gtid];
3474 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3475 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3476 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3477 __kmp_print_structure_thread(
" Uber Thread: ",
3478 root->r.r_uber_thread);
3479 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3480 __kmp_printf(
" In Parallel: %2d\n",
3481 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3483 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3484 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3488 __kmp_printf(
"Ubers array is not allocated.\n");
3491 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3493 while (list->next != NULL) {
3494 kmp_team_p
const *team = list->entry;
3496 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3497 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3498 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3499 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3500 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3501 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3502 for (i = 0; i < team->t.t_nproc; ++i) {
3503 __kmp_printf(
" Thread %2d: ", i);
3504 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3506 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3512 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3514 __kmp_print_structure_thread(
"Thread pool: ",
3515 CCAST(kmp_info_t *, __kmp_thread_pool));
3516 __kmp_print_structure_team(
"Team pool: ",
3517 CCAST(kmp_team_t *, __kmp_team_pool));
3521 while (list != NULL) {
3522 kmp_team_list_item_t *item = list;
3524 KMP_INTERNAL_FREE(item);
3533 static const unsigned __kmp_primes[] = {
3534 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3535 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3536 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3537 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3538 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3539 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3540 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3541 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3542 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3543 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3544 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3548 unsigned short __kmp_get_random(kmp_info_t *thread) {
3549 unsigned x = thread->th.th_x;
3550 unsigned short r = (
unsigned short)(x >> 16);
3552 thread->th.th_x = x * thread->th.th_a + 1;
3554 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3555 thread->th.th_info.ds.ds_tid, r));
3561 void __kmp_init_random(kmp_info_t *thread) {
3562 unsigned seed = thread->th.th_info.ds.ds_tid;
3565 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3566 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3568 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3574 static int __kmp_reclaim_dead_roots(
void) {
3577 for (i = 0; i < __kmp_threads_capacity; ++i) {
3578 if (KMP_UBER_GTID(i) &&
3579 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3582 r += __kmp_unregister_root_other_thread(i);
3607 static int __kmp_expand_threads(
int nNeed) {
3609 int minimumRequiredCapacity;
3611 kmp_info_t **newThreads;
3612 kmp_root_t **newRoot;
3618 #if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3621 added = __kmp_reclaim_dead_roots();
3650 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3653 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3657 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3659 newCapacity = __kmp_threads_capacity;
3661 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3662 : __kmp_sys_max_nth;
3663 }
while (newCapacity < minimumRequiredCapacity);
3664 newThreads = (kmp_info_t **)__kmp_allocate(
3665 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3667 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3668 KMP_MEMCPY(newThreads, __kmp_threads,
3669 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3670 KMP_MEMCPY(newRoot, __kmp_root,
3671 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3674 kmp_old_threads_list_t *node =
3675 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3676 node->threads = __kmp_threads;
3677 node->next = __kmp_old_threads_list;
3678 __kmp_old_threads_list = node;
3680 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3681 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3682 added += newCapacity - __kmp_threads_capacity;
3683 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3685 if (newCapacity > __kmp_tp_capacity) {
3686 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3687 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3688 __kmp_threadprivate_resize_cache(newCapacity);
3690 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3692 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3701 int __kmp_register_root(
int initial_thread) {
3702 kmp_info_t *root_thread;
3706 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3707 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3724 capacity = __kmp_threads_capacity;
3725 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3732 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3733 capacity -= __kmp_hidden_helper_threads_num;
3737 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3738 if (__kmp_tp_cached) {
3739 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3740 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3741 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3743 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3753 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3756 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3757 gtid <= __kmp_hidden_helper_threads_num;
3760 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3761 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3762 "hidden helper thread: T#%d\n",
3768 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3771 for (gtid = __kmp_hidden_helper_threads_num + 1;
3772 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3776 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3777 KMP_ASSERT(gtid < __kmp_threads_capacity);
3782 TCW_4(__kmp_nth, __kmp_nth + 1);
3786 if (__kmp_adjust_gtid_mode) {
3787 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3788 if (TCR_4(__kmp_gtid_mode) != 2) {
3789 TCW_4(__kmp_gtid_mode, 2);
3792 if (TCR_4(__kmp_gtid_mode) != 1) {
3793 TCW_4(__kmp_gtid_mode, 1);
3798 #ifdef KMP_ADJUST_BLOCKTIME
3801 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3802 if (__kmp_nth > __kmp_avail_proc) {
3803 __kmp_zero_bt = TRUE;
3809 if (!(root = __kmp_root[gtid])) {
3810 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3811 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3814 #if KMP_STATS_ENABLED
3816 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3817 __kmp_stats_thread_ptr->startLife();
3818 KMP_SET_THREAD_STATE(SERIAL_REGION);
3821 __kmp_initialize_root(root);
3824 if (root->r.r_uber_thread) {
3825 root_thread = root->r.r_uber_thread;
3827 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3828 if (__kmp_storage_map) {
3829 __kmp_print_thread_storage_map(root_thread, gtid);
3831 root_thread->th.th_info.ds.ds_gtid = gtid;
3833 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3835 root_thread->th.th_root = root;
3836 if (__kmp_env_consistency_check) {
3837 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3840 __kmp_initialize_fast_memory(root_thread);
3844 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3845 __kmp_initialize_bget(root_thread);
3847 __kmp_init_random(root_thread);
3851 if (!root_thread->th.th_serial_team) {
3852 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3853 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3854 root_thread->th.th_serial_team = __kmp_allocate_team(
3859 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3861 KMP_ASSERT(root_thread->th.th_serial_team);
3862 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3863 root_thread->th.th_serial_team));
3866 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3868 root->r.r_root_team->t.t_threads[0] = root_thread;
3869 root->r.r_hot_team->t.t_threads[0] = root_thread;
3870 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3872 root_thread->th.th_serial_team->t.t_serialized = 0;
3873 root->r.r_uber_thread = root_thread;
3876 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3877 TCW_4(__kmp_init_gtid, TRUE);
3880 __kmp_gtid_set_specific(gtid);
3883 __kmp_itt_thread_name(gtid);
3886 #ifdef KMP_TDATA_GTID
3889 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3890 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3892 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3894 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3895 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3896 KMP_INIT_BARRIER_STATE));
3899 for (b = 0; b < bs_last_barrier; ++b) {
3900 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3902 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3906 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3907 KMP_INIT_BARRIER_STATE);
3909 #if KMP_AFFINITY_SUPPORTED
3910 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3911 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3912 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3913 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3915 root_thread->th.th_def_allocator = __kmp_def_allocator;
3916 root_thread->th.th_prev_level = 0;
3917 root_thread->th.th_prev_num_threads = 1;
3919 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3920 tmp->cg_root = root_thread;
3921 tmp->cg_thread_limit = __kmp_cg_max_nth;
3922 tmp->cg_nthreads = 1;
3923 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3924 " cg_nthreads init to 1\n",
3927 root_thread->th.th_cg_roots = tmp;
3929 __kmp_root_counter++;
3932 if (!initial_thread && ompt_enabled.enabled) {
3934 kmp_info_t *root_thread = ompt_get_thread();
3936 ompt_set_thread_state(root_thread, ompt_state_overhead);
3938 if (ompt_enabled.ompt_callback_thread_begin) {
3939 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3940 ompt_thread_initial, __ompt_get_thread_data_internal());
3942 ompt_data_t *task_data;
3943 ompt_data_t *parallel_data;
3944 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
3946 if (ompt_enabled.ompt_callback_implicit_task) {
3947 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3948 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3951 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3955 if (ompd_state & OMPD_ENABLE_BP)
3956 ompd_bp_thread_begin();
3960 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3965 #if KMP_NESTED_HOT_TEAMS
3966 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3967 const int max_level) {
3969 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3970 if (!hot_teams || !hot_teams[level].hot_team) {
3973 KMP_DEBUG_ASSERT(level < max_level);
3974 kmp_team_t *team = hot_teams[level].hot_team;
3975 nth = hot_teams[level].hot_team_nth;
3977 if (level < max_level - 1) {
3978 for (i = 0; i < nth; ++i) {
3979 kmp_info_t *th = team->t.t_threads[i];
3980 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3981 if (i > 0 && th->th.th_hot_teams) {
3982 __kmp_free(th->th.th_hot_teams);
3983 th->th.th_hot_teams = NULL;
3987 __kmp_free_team(root, team, NULL);
3994 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3995 kmp_team_t *root_team = root->r.r_root_team;
3996 kmp_team_t *hot_team = root->r.r_hot_team;
3997 int n = hot_team->t.t_nproc;
4000 KMP_DEBUG_ASSERT(!root->r.r_active);
4002 root->r.r_root_team = NULL;
4003 root->r.r_hot_team = NULL;
4006 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4007 #if KMP_NESTED_HOT_TEAMS
4008 if (__kmp_hot_teams_max_level >
4010 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4011 kmp_info_t *th = hot_team->t.t_threads[i];
4012 if (__kmp_hot_teams_max_level > 1) {
4013 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4015 if (th->th.th_hot_teams) {
4016 __kmp_free(th->th.th_hot_teams);
4017 th->th.th_hot_teams = NULL;
4022 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4027 if (__kmp_tasking_mode != tskm_immediate_exec) {
4028 __kmp_wait_to_unref_task_teams();
4034 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4036 (LPVOID) & (root->r.r_uber_thread->th),
4037 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4038 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4042 if (ompd_state & OMPD_ENABLE_BP)
4043 ompd_bp_thread_end();
4047 ompt_data_t *task_data;
4048 ompt_data_t *parallel_data;
4049 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4051 if (ompt_enabled.ompt_callback_implicit_task) {
4052 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4053 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4055 if (ompt_enabled.ompt_callback_thread_end) {
4056 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4057 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4063 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4064 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4066 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4067 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4070 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4071 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4072 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4073 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4074 root->r.r_uber_thread->th.th_cg_roots = NULL;
4076 __kmp_reap_thread(root->r.r_uber_thread, 1);
4080 root->r.r_uber_thread = NULL;
4082 root->r.r_begin = FALSE;
4087 void __kmp_unregister_root_current_thread(
int gtid) {
4088 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4092 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4093 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4094 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4097 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4100 kmp_root_t *root = __kmp_root[gtid];
4102 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4103 KMP_ASSERT(KMP_UBER_GTID(gtid));
4104 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4105 KMP_ASSERT(root->r.r_active == FALSE);
4109 kmp_info_t *thread = __kmp_threads[gtid];
4110 kmp_team_t *team = thread->th.th_team;
4111 kmp_task_team_t *task_team = thread->th.th_task_team;
4114 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4115 task_team->tt.tt_hidden_helper_task_encountered)) {
4118 thread->th.ompt_thread_info.state = ompt_state_undefined;
4120 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4123 __kmp_reset_root(gtid, root);
4127 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4129 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4136 static int __kmp_unregister_root_other_thread(
int gtid) {
4137 kmp_root_t *root = __kmp_root[gtid];
4140 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4141 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4142 KMP_ASSERT(KMP_UBER_GTID(gtid));
4143 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4144 KMP_ASSERT(root->r.r_active == FALSE);
4146 r = __kmp_reset_root(gtid, root);
4148 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4154 void __kmp_task_info() {
4156 kmp_int32 gtid = __kmp_entry_gtid();
4157 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4158 kmp_info_t *this_thr = __kmp_threads[gtid];
4159 kmp_team_t *steam = this_thr->th.th_serial_team;
4160 kmp_team_t *team = this_thr->th.th_team;
4163 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4165 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4166 team->t.t_implicit_task_taskdata[tid].td_parent);
4173 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4174 int tid,
int gtid) {
4178 KMP_DEBUG_ASSERT(this_thr != NULL);
4179 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4180 KMP_DEBUG_ASSERT(team);
4181 KMP_DEBUG_ASSERT(team->t.t_threads);
4182 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4183 kmp_info_t *master = team->t.t_threads[0];
4184 KMP_DEBUG_ASSERT(master);
4185 KMP_DEBUG_ASSERT(master->th.th_root);
4189 TCW_SYNC_PTR(this_thr->th.th_team, team);
4191 this_thr->th.th_info.ds.ds_tid = tid;
4192 this_thr->th.th_set_nproc = 0;
4193 if (__kmp_tasking_mode != tskm_immediate_exec)
4196 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4198 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4199 this_thr->th.th_set_proc_bind = proc_bind_default;
4200 #if KMP_AFFINITY_SUPPORTED
4201 this_thr->th.th_new_place = this_thr->th.th_current_place;
4203 this_thr->th.th_root = master->th.th_root;
4206 this_thr->th.th_team_nproc = team->t.t_nproc;
4207 this_thr->th.th_team_master = master;
4208 this_thr->th.th_team_serialized = team->t.t_serialized;
4210 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4212 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4213 tid, gtid, this_thr, this_thr->th.th_current_task));
4215 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4218 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4219 tid, gtid, this_thr, this_thr->th.th_current_task));
4224 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4226 this_thr->th.th_local.this_construct = 0;
4228 if (!this_thr->th.th_pri_common) {
4229 this_thr->th.th_pri_common =
4230 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4231 if (__kmp_storage_map) {
4232 __kmp_print_storage_map_gtid(
4233 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4234 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4236 this_thr->th.th_pri_head = NULL;
4239 if (this_thr != master &&
4240 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4242 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4243 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4246 int i = tmp->cg_nthreads--;
4247 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4248 " on node %p of thread %p to %d\n",
4249 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4254 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4256 this_thr->th.th_cg_roots->cg_nthreads++;
4257 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4258 " node %p of thread %p to %d\n",
4259 this_thr, this_thr->th.th_cg_roots,
4260 this_thr->th.th_cg_roots->cg_root,
4261 this_thr->th.th_cg_roots->cg_nthreads));
4262 this_thr->th.th_current_task->td_icvs.thread_limit =
4263 this_thr->th.th_cg_roots->cg_thread_limit;
4268 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4271 sizeof(dispatch_private_info_t) *
4272 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4273 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4274 team->t.t_max_nproc));
4275 KMP_ASSERT(dispatch);
4276 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4277 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4279 dispatch->th_disp_index = 0;
4280 dispatch->th_doacross_buf_idx = 0;
4281 if (!dispatch->th_disp_buffer) {
4282 dispatch->th_disp_buffer =
4283 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4285 if (__kmp_storage_map) {
4286 __kmp_print_storage_map_gtid(
4287 gtid, &dispatch->th_disp_buffer[0],
4288 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4290 : __kmp_dispatch_num_buffers],
4292 "th_%d.th_dispatch.th_disp_buffer "
4293 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4294 gtid, team->t.t_id, gtid);
4297 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4300 dispatch->th_dispatch_pr_current = 0;
4301 dispatch->th_dispatch_sh_current = 0;
4303 dispatch->th_deo_fcn = 0;
4304 dispatch->th_dxo_fcn = 0;
4307 this_thr->th.th_next_pool = NULL;
4309 if (!this_thr->th.th_task_state_memo_stack) {
4311 this_thr->th.th_task_state_memo_stack =
4312 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4313 this_thr->th.th_task_state_top = 0;
4314 this_thr->th.th_task_state_stack_sz = 4;
4315 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4317 this_thr->th.th_task_state_memo_stack[i] = 0;
4320 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4321 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4331 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4333 kmp_team_t *serial_team;
4334 kmp_info_t *new_thr;
4337 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4338 KMP_DEBUG_ASSERT(root && team);
4339 #if !KMP_NESTED_HOT_TEAMS
4340 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4345 if (__kmp_thread_pool) {
4346 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4347 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4348 if (new_thr == __kmp_thread_pool_insert_pt) {
4349 __kmp_thread_pool_insert_pt = NULL;
4351 TCW_4(new_thr->th.th_in_pool, FALSE);
4352 __kmp_suspend_initialize_thread(new_thr);
4353 __kmp_lock_suspend_mx(new_thr);
4354 if (new_thr->th.th_active_in_pool == TRUE) {
4355 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4356 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4357 new_thr->th.th_active_in_pool = FALSE;
4359 __kmp_unlock_suspend_mx(new_thr);
4361 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4362 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4363 KMP_ASSERT(!new_thr->th.th_team);
4364 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4367 __kmp_initialize_info(new_thr, team, new_tid,
4368 new_thr->th.th_info.ds.ds_gtid);
4369 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4371 TCW_4(__kmp_nth, __kmp_nth + 1);
4373 new_thr->th.th_task_state = 0;
4374 new_thr->th.th_task_state_top = 0;
4375 new_thr->th.th_task_state_stack_sz = 4;
4377 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4379 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4383 #ifdef KMP_ADJUST_BLOCKTIME
4386 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4387 if (__kmp_nth > __kmp_avail_proc) {
4388 __kmp_zero_bt = TRUE;
4397 kmp_balign_t *balign = new_thr->th.th_bar;
4398 for (b = 0; b < bs_last_barrier; ++b)
4399 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4402 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4403 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4410 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4411 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4416 if (!TCR_4(__kmp_init_monitor)) {
4417 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4418 if (!TCR_4(__kmp_init_monitor)) {
4419 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4420 TCW_4(__kmp_init_monitor, 1);
4421 __kmp_create_monitor(&__kmp_monitor);
4422 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4433 while (TCR_4(__kmp_init_monitor) < 2) {
4436 KF_TRACE(10, (
"after monitor thread has started\n"));
4439 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4446 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4448 : __kmp_hidden_helper_threads_num + 1;
4450 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4452 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4455 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4456 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4461 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4463 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4465 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4468 __itt_suppress_mark_range(
4469 __itt_suppress_range, __itt_suppress_threading_errors,
4470 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4471 __itt_suppress_mark_range(
4472 __itt_suppress_range, __itt_suppress_threading_errors,
4473 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4475 __itt_suppress_mark_range(
4476 __itt_suppress_range, __itt_suppress_threading_errors,
4477 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4479 __itt_suppress_mark_range(__itt_suppress_range,
4480 __itt_suppress_threading_errors,
4481 &new_thr->th.th_suspend_init_count,
4482 sizeof(new_thr->th.th_suspend_init_count));
4485 __itt_suppress_mark_range(__itt_suppress_range,
4486 __itt_suppress_threading_errors,
4487 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4488 sizeof(new_thr->th.th_bar[0].bb.b_go));
4489 __itt_suppress_mark_range(__itt_suppress_range,
4490 __itt_suppress_threading_errors,
4491 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4492 sizeof(new_thr->th.th_bar[1].bb.b_go));
4493 __itt_suppress_mark_range(__itt_suppress_range,
4494 __itt_suppress_threading_errors,
4495 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4496 sizeof(new_thr->th.th_bar[2].bb.b_go));
4498 if (__kmp_storage_map) {
4499 __kmp_print_thread_storage_map(new_thr, new_gtid);
4504 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4505 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4506 new_thr->th.th_serial_team = serial_team =
4507 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4511 proc_bind_default, &r_icvs,
4512 0 USE_NESTED_HOT_ARG(NULL));
4514 KMP_ASSERT(serial_team);
4515 serial_team->t.t_serialized = 0;
4517 serial_team->t.t_threads[0] = new_thr;
4519 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4523 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4526 __kmp_initialize_fast_memory(new_thr);
4530 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4531 __kmp_initialize_bget(new_thr);
4534 __kmp_init_random(new_thr);
4538 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4539 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4542 kmp_balign_t *balign = new_thr->th.th_bar;
4543 for (b = 0; b < bs_last_barrier; ++b) {
4544 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4545 balign[b].bb.team = NULL;
4546 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4547 balign[b].bb.use_oncore_barrier = 0;
4550 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4551 new_thr->th.th_sleep_loc_type = flag_unset;
4553 new_thr->th.th_spin_here = FALSE;
4554 new_thr->th.th_next_waiting = 0;
4556 new_thr->th.th_blocking =
false;
4559 #if KMP_AFFINITY_SUPPORTED
4560 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4561 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4562 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4563 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4565 new_thr->th.th_def_allocator = __kmp_def_allocator;
4566 new_thr->th.th_prev_level = 0;
4567 new_thr->th.th_prev_num_threads = 1;
4569 TCW_4(new_thr->th.th_in_pool, FALSE);
4570 new_thr->th.th_active_in_pool = FALSE;
4571 TCW_4(new_thr->th.th_active, TRUE);
4579 if (__kmp_adjust_gtid_mode) {
4580 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4581 if (TCR_4(__kmp_gtid_mode) != 2) {
4582 TCW_4(__kmp_gtid_mode, 2);
4585 if (TCR_4(__kmp_gtid_mode) != 1) {
4586 TCW_4(__kmp_gtid_mode, 1);
4591 #ifdef KMP_ADJUST_BLOCKTIME
4594 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4595 if (__kmp_nth > __kmp_avail_proc) {
4596 __kmp_zero_bt = TRUE;
4603 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4604 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4606 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4608 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4619 static void __kmp_reinitialize_team(kmp_team_t *team,
4620 kmp_internal_control_t *new_icvs,
4622 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4623 team->t.t_threads[0], team));
4624 KMP_DEBUG_ASSERT(team && new_icvs);
4625 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4626 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4628 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4630 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4631 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4633 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4634 team->t.t_threads[0], team));
4640 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4641 kmp_internal_control_t *new_icvs,
4643 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4646 KMP_DEBUG_ASSERT(team);
4647 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4648 KMP_DEBUG_ASSERT(team->t.t_threads);
4651 team->t.t_master_tid = 0;
4653 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4654 team->t.t_nproc = new_nproc;
4657 team->t.t_next_pool = NULL;
4661 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4662 team->t.t_invoke = NULL;
4665 team->t.t_sched.sched = new_icvs->sched.sched;
4667 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
4668 team->t.t_fp_control_saved = FALSE;
4669 team->t.t_x87_fpu_control_word = 0;
4670 team->t.t_mxcsr = 0;
4673 team->t.t_construct = 0;
4675 team->t.t_ordered.dt.t_value = 0;
4676 team->t.t_master_active = FALSE;
4679 team->t.t_copypriv_data = NULL;
4682 team->t.t_copyin_counter = 0;
4685 team->t.t_control_stack_top = NULL;
4687 __kmp_reinitialize_team(team, new_icvs, loc);
4690 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4693 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
4696 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4697 if (KMP_AFFINITY_CAPABLE()) {
4699 if (old_mask != NULL) {
4700 status = __kmp_get_system_affinity(old_mask, TRUE);
4703 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4707 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4712 #if KMP_AFFINITY_SUPPORTED
4718 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4720 if (KMP_HIDDEN_HELPER_TEAM(team))
4723 kmp_info_t *master_th = team->t.t_threads[0];
4724 KMP_DEBUG_ASSERT(master_th != NULL);
4725 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4726 int first_place = master_th->th.th_first_place;
4727 int last_place = master_th->th.th_last_place;
4728 int masters_place = master_th->th.th_current_place;
4729 team->t.t_first_place = first_place;
4730 team->t.t_last_place = last_place;
4732 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4733 "bound to place %d partition = [%d,%d]\n",
4734 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4735 team->t.t_id, masters_place, first_place, last_place));
4737 switch (proc_bind) {
4739 case proc_bind_default:
4742 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4745 case proc_bind_primary: {
4747 int n_th = team->t.t_nproc;
4748 for (f = 1; f < n_th; f++) {
4749 kmp_info_t *th = team->t.t_threads[f];
4750 KMP_DEBUG_ASSERT(th != NULL);
4751 th->th.th_first_place = first_place;
4752 th->th.th_last_place = last_place;
4753 th->th.th_new_place = masters_place;
4754 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4755 team->t.t_display_affinity != 1) {
4756 team->t.t_display_affinity = 1;
4759 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4760 "partition = [%d,%d]\n",
4761 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4762 f, masters_place, first_place, last_place));
4766 case proc_bind_close: {
4768 int n_th = team->t.t_nproc;
4770 if (first_place <= last_place) {
4771 n_places = last_place - first_place + 1;
4773 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4775 if (n_th <= n_places) {
4776 int place = masters_place;
4777 for (f = 1; f < n_th; f++) {
4778 kmp_info_t *th = team->t.t_threads[f];
4779 KMP_DEBUG_ASSERT(th != NULL);
4781 if (place == last_place) {
4782 place = first_place;
4783 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4788 th->th.th_first_place = first_place;
4789 th->th.th_last_place = last_place;
4790 th->th.th_new_place = place;
4791 if (__kmp_display_affinity && place != th->th.th_current_place &&
4792 team->t.t_display_affinity != 1) {
4793 team->t.t_display_affinity = 1;
4796 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4797 "partition = [%d,%d]\n",
4798 __kmp_gtid_from_thread(team->t.t_threads[f]),
4799 team->t.t_id, f, place, first_place, last_place));
4802 int S, rem, gap, s_count;
4803 S = n_th / n_places;
4805 rem = n_th - (S * n_places);
4806 gap = rem > 0 ? n_places / rem : n_places;
4807 int place = masters_place;
4809 for (f = 0; f < n_th; f++) {
4810 kmp_info_t *th = team->t.t_threads[f];
4811 KMP_DEBUG_ASSERT(th != NULL);
4813 th->th.th_first_place = first_place;
4814 th->th.th_last_place = last_place;
4815 th->th.th_new_place = place;
4816 if (__kmp_display_affinity && place != th->th.th_current_place &&
4817 team->t.t_display_affinity != 1) {
4818 team->t.t_display_affinity = 1;
4822 if ((s_count == S) && rem && (gap_ct == gap)) {
4824 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4826 if (place == last_place) {
4827 place = first_place;
4828 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4836 }
else if (s_count == S) {
4837 if (place == last_place) {
4838 place = first_place;
4839 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4849 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4850 "partition = [%d,%d]\n",
4851 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4852 th->th.th_new_place, first_place, last_place));
4854 KMP_DEBUG_ASSERT(place == masters_place);
4858 case proc_bind_spread: {
4860 int n_th = team->t.t_nproc;
4863 if (first_place <= last_place) {
4864 n_places = last_place - first_place + 1;
4866 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4868 if (n_th <= n_places) {
4871 if (n_places !=
static_cast<int>(__kmp_affinity_num_masks)) {
4872 int S = n_places / n_th;
4873 int s_count, rem, gap, gap_ct;
4875 place = masters_place;
4876 rem = n_places - n_th * S;
4877 gap = rem ? n_th / rem : 1;
4880 if (update_master_only == 1)
4882 for (f = 0; f < thidx; f++) {
4883 kmp_info_t *th = team->t.t_threads[f];
4884 KMP_DEBUG_ASSERT(th != NULL);
4886 th->th.th_first_place = place;
4887 th->th.th_new_place = place;
4888 if (__kmp_display_affinity && place != th->th.th_current_place &&
4889 team->t.t_display_affinity != 1) {
4890 team->t.t_display_affinity = 1;
4893 while (s_count < S) {
4894 if (place == last_place) {
4895 place = first_place;
4896 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4903 if (rem && (gap_ct == gap)) {
4904 if (place == last_place) {
4905 place = first_place;
4906 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4914 th->th.th_last_place = place;
4917 if (place == last_place) {
4918 place = first_place;
4919 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4926 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4927 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4928 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4929 f, th->th.th_new_place, th->th.th_first_place,
4930 th->th.th_last_place, __kmp_affinity_num_masks));
4936 double current =
static_cast<double>(masters_place);
4938 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
4943 if (update_master_only == 1)
4945 for (f = 0; f < thidx; f++) {
4946 first =
static_cast<int>(current);
4947 last =
static_cast<int>(current + spacing) - 1;
4948 KMP_DEBUG_ASSERT(last >= first);
4949 if (first >= n_places) {
4950 if (masters_place) {
4953 if (first == (masters_place + 1)) {
4954 KMP_DEBUG_ASSERT(f == n_th);
4957 if (last == masters_place) {
4958 KMP_DEBUG_ASSERT(f == (n_th - 1));
4962 KMP_DEBUG_ASSERT(f == n_th);
4967 if (last >= n_places) {
4968 last = (n_places - 1);
4973 KMP_DEBUG_ASSERT(0 <= first);
4974 KMP_DEBUG_ASSERT(n_places > first);
4975 KMP_DEBUG_ASSERT(0 <= last);
4976 KMP_DEBUG_ASSERT(n_places > last);
4977 KMP_DEBUG_ASSERT(last_place >= first_place);
4978 th = team->t.t_threads[f];
4979 KMP_DEBUG_ASSERT(th);
4980 th->th.th_first_place = first;
4981 th->th.th_new_place = place;
4982 th->th.th_last_place = last;
4983 if (__kmp_display_affinity && place != th->th.th_current_place &&
4984 team->t.t_display_affinity != 1) {
4985 team->t.t_display_affinity = 1;
4988 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4989 "partition = [%d,%d], spacing = %.4f\n",
4990 __kmp_gtid_from_thread(team->t.t_threads[f]),
4991 team->t.t_id, f, th->th.th_new_place,
4992 th->th.th_first_place, th->th.th_last_place, spacing));
4996 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4998 int S, rem, gap, s_count;
4999 S = n_th / n_places;
5001 rem = n_th - (S * n_places);
5002 gap = rem > 0 ? n_places / rem : n_places;
5003 int place = masters_place;
5006 if (update_master_only == 1)
5008 for (f = 0; f < thidx; f++) {
5009 kmp_info_t *th = team->t.t_threads[f];
5010 KMP_DEBUG_ASSERT(th != NULL);
5012 th->th.th_first_place = place;
5013 th->th.th_last_place = place;
5014 th->th.th_new_place = place;
5015 if (__kmp_display_affinity && place != th->th.th_current_place &&
5016 team->t.t_display_affinity != 1) {
5017 team->t.t_display_affinity = 1;
5021 if ((s_count == S) && rem && (gap_ct == gap)) {
5023 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5025 if (place == last_place) {
5026 place = first_place;
5027 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
5035 }
else if (s_count == S) {
5036 if (place == last_place) {
5037 place = first_place;
5038 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
5047 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5048 "partition = [%d,%d]\n",
5049 __kmp_gtid_from_thread(team->t.t_threads[f]),
5050 team->t.t_id, f, th->th.th_new_place,
5051 th->th.th_first_place, th->th.th_last_place));
5053 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5061 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5069 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5071 ompt_data_t ompt_parallel_data,
5073 kmp_proc_bind_t new_proc_bind,
5074 kmp_internal_control_t *new_icvs,
5075 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5076 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5079 int use_hot_team = !root->r.r_active;
5081 int do_place_partition = 1;
5083 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5084 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5085 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5088 #if KMP_NESTED_HOT_TEAMS
5089 kmp_hot_team_ptr_t *hot_teams;
5091 team = master->th.th_team;
5092 level = team->t.t_active_level;
5093 if (master->th.th_teams_microtask) {
5094 if (master->th.th_teams_size.nteams > 1 &&
5097 (microtask_t)__kmp_teams_master ||
5098 master->th.th_teams_level <
5105 if ((master->th.th_teams_size.nteams == 1 &&
5106 master->th.th_teams_level >= team->t.t_level) ||
5107 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5108 do_place_partition = 0;
5110 hot_teams = master->th.th_hot_teams;
5111 if (level < __kmp_hot_teams_max_level && hot_teams &&
5112 hot_teams[level].hot_team) {
5120 KMP_DEBUG_ASSERT(new_nproc == 1);
5124 if (use_hot_team && new_nproc > 1) {
5125 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5126 #if KMP_NESTED_HOT_TEAMS
5127 team = hot_teams[level].hot_team;
5129 team = root->r.r_hot_team;
5132 if (__kmp_tasking_mode != tskm_immediate_exec) {
5133 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5134 "task_team[1] = %p before reinit\n",
5135 team->t.t_task_team[0], team->t.t_task_team[1]));
5139 if (team->t.t_nproc != new_nproc &&
5140 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5142 int old_nthr = team->t.t_nproc;
5143 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5148 if (do_place_partition == 0)
5149 team->t.t_proc_bind = proc_bind_default;
5153 if (team->t.t_nproc == new_nproc) {
5154 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5157 if (team->t.t_size_changed == -1) {
5158 team->t.t_size_changed = 1;
5160 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5164 kmp_r_sched_t new_sched = new_icvs->sched;
5166 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5168 __kmp_reinitialize_team(team, new_icvs,
5169 root->r.r_uber_thread->th.th_ident);
5171 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5172 team->t.t_threads[0], team));
5173 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5175 #if KMP_AFFINITY_SUPPORTED
5176 if ((team->t.t_size_changed == 0) &&
5177 (team->t.t_proc_bind == new_proc_bind)) {
5178 if (new_proc_bind == proc_bind_spread) {
5179 if (do_place_partition) {
5181 __kmp_partition_places(team, 1);
5184 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5185 "proc_bind = %d, partition = [%d,%d]\n",
5186 team->t.t_id, new_proc_bind, team->t.t_first_place,
5187 team->t.t_last_place));
5189 if (do_place_partition) {
5190 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5191 __kmp_partition_places(team);
5195 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5197 }
else if (team->t.t_nproc > new_nproc) {
5199 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5202 team->t.t_size_changed = 1;
5203 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5206 __kmp_add_threads_to_team(team, new_nproc);
5208 #if KMP_NESTED_HOT_TEAMS
5209 if (__kmp_hot_teams_mode == 0) {
5212 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5213 hot_teams[level].hot_team_nth = new_nproc;
5216 for (f = new_nproc; f < team->t.t_nproc; f++) {
5217 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5218 if (__kmp_tasking_mode != tskm_immediate_exec) {
5221 team->t.t_threads[f]->th.th_task_team = NULL;
5223 __kmp_free_thread(team->t.t_threads[f]);
5224 team->t.t_threads[f] = NULL;
5226 #if KMP_NESTED_HOT_TEAMS
5231 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5232 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5233 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5234 for (
int b = 0; b < bs_last_barrier; ++b) {
5235 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5236 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5238 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5243 team->t.t_nproc = new_nproc;
5245 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5246 __kmp_reinitialize_team(team, new_icvs,
5247 root->r.r_uber_thread->th.th_ident);
5250 for (f = 0; f < new_nproc; ++f) {
5251 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5256 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5257 team->t.t_threads[0], team));
5259 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5262 for (f = 0; f < team->t.t_nproc; f++) {
5263 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5264 team->t.t_threads[f]->th.th_team_nproc ==
5269 if (do_place_partition) {
5270 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5271 #if KMP_AFFINITY_SUPPORTED
5272 __kmp_partition_places(team);
5276 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5277 kmp_affin_mask_t *old_mask;
5278 if (KMP_AFFINITY_CAPABLE()) {
5279 KMP_CPU_ALLOC(old_mask);
5284 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5286 int old_nproc = team->t.t_nproc;
5287 team->t.t_size_changed = 1;
5289 #if KMP_NESTED_HOT_TEAMS
5290 int avail_threads = hot_teams[level].hot_team_nth;
5291 if (new_nproc < avail_threads)
5292 avail_threads = new_nproc;
5293 kmp_info_t **other_threads = team->t.t_threads;
5294 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5298 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5299 for (b = 0; b < bs_last_barrier; ++b) {
5300 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5301 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5303 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5307 if (hot_teams[level].hot_team_nth >= new_nproc) {
5310 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5311 team->t.t_nproc = new_nproc;
5315 team->t.t_nproc = hot_teams[level].hot_team_nth;
5316 hot_teams[level].hot_team_nth = new_nproc;
5318 if (team->t.t_max_nproc < new_nproc) {
5320 __kmp_reallocate_team_arrays(team, new_nproc);
5321 __kmp_reinitialize_team(team, new_icvs, NULL);
5324 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5330 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5334 for (f = team->t.t_nproc; f < new_nproc; f++) {
5335 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5336 KMP_DEBUG_ASSERT(new_worker);
5337 team->t.t_threads[f] = new_worker;
5340 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5341 "join=%llu, plain=%llu\n",
5342 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5343 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5344 team->t.t_bar[bs_plain_barrier].b_arrived));
5348 kmp_balign_t *balign = new_worker->th.th_bar;
5349 for (b = 0; b < bs_last_barrier; ++b) {
5350 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5351 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5352 KMP_BARRIER_PARENT_FLAG);
5354 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5360 #if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5361 if (KMP_AFFINITY_CAPABLE()) {
5363 __kmp_set_system_affinity(old_mask, TRUE);
5364 KMP_CPU_FREE(old_mask);
5367 #if KMP_NESTED_HOT_TEAMS
5370 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5373 __kmp_add_threads_to_team(team, new_nproc);
5377 __kmp_initialize_team(team, new_nproc, new_icvs,
5378 root->r.r_uber_thread->th.th_ident);
5381 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5382 for (f = 0; f < team->t.t_nproc; ++f)
5383 __kmp_initialize_info(team->t.t_threads[f], team, f,
5384 __kmp_gtid_from_tid(f, team));
5392 for (f = old_nproc; f < team->t.t_nproc; ++f)
5393 team->t.t_threads[f]->th.th_task_state =
5394 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5397 kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state;
5398 for (f = old_nproc; f < team->t.t_nproc; ++f)
5399 team->t.t_threads[f]->th.th_task_state = old_state;
5403 for (f = 0; f < team->t.t_nproc; ++f) {
5404 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5405 team->t.t_threads[f]->th.th_team_nproc ==
5410 if (do_place_partition) {
5411 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5412 #if KMP_AFFINITY_SUPPORTED
5413 __kmp_partition_places(team);
5418 kmp_info_t *master = team->t.t_threads[0];
5419 if (master->th.th_teams_microtask) {
5420 for (f = 1; f < new_nproc; ++f) {
5422 kmp_info_t *thr = team->t.t_threads[f];
5423 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5424 thr->th.th_teams_level = master->th.th_teams_level;
5425 thr->th.th_teams_size = master->th.th_teams_size;
5428 #if KMP_NESTED_HOT_TEAMS
5432 for (f = 1; f < new_nproc; ++f) {
5433 kmp_info_t *thr = team->t.t_threads[f];
5435 kmp_balign_t *balign = thr->th.th_bar;
5436 for (b = 0; b < bs_last_barrier; ++b) {
5437 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5438 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5440 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5448 __kmp_alloc_argv_entries(argc, team, TRUE);
5449 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5453 KF_TRACE(10, (
" hot_team = %p\n", team));
5456 if (__kmp_tasking_mode != tskm_immediate_exec) {
5457 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5458 "task_team[1] = %p after reinit\n",
5459 team->t.t_task_team[0], team->t.t_task_team[1]));
5464 __ompt_team_assign_id(team, ompt_parallel_data);
5474 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5477 if (team->t.t_max_nproc >= max_nproc) {
5479 __kmp_team_pool = team->t.t_next_pool;
5481 if (max_nproc > 1 &&
5482 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5484 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5489 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5491 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5492 "task_team[1] %p to NULL\n",
5493 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5494 team->t.t_task_team[0] = NULL;
5495 team->t.t_task_team[1] = NULL;
5498 __kmp_alloc_argv_entries(argc, team, TRUE);
5499 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5502 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5503 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5506 for (b = 0; b < bs_last_barrier; ++b) {
5507 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5509 team->t.t_bar[b].b_master_arrived = 0;
5510 team->t.t_bar[b].b_team_arrived = 0;
5515 team->t.t_proc_bind = new_proc_bind;
5517 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5521 __ompt_team_assign_id(team, ompt_parallel_data);
5533 team = __kmp_reap_team(team);
5534 __kmp_team_pool = team;
5539 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5542 team->t.t_max_nproc = max_nproc;
5543 if (max_nproc > 1 &&
5544 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5546 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5551 __kmp_allocate_team_arrays(team, max_nproc);
5553 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5554 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5556 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5558 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5559 team->t.t_task_team[0] = NULL;
5561 team->t.t_task_team[1] = NULL;
5564 if (__kmp_storage_map) {
5565 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5569 __kmp_alloc_argv_entries(argc, team, FALSE);
5570 team->t.t_argc = argc;
5573 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5574 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5577 for (b = 0; b < bs_last_barrier; ++b) {
5578 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5580 team->t.t_bar[b].b_master_arrived = 0;
5581 team->t.t_bar[b].b_team_arrived = 0;
5586 team->t.t_proc_bind = new_proc_bind;
5589 __ompt_team_assign_id(team, ompt_parallel_data);
5590 team->t.ompt_serialized_team_info = NULL;
5595 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5606 void __kmp_free_team(kmp_root_t *root,
5607 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5609 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5613 KMP_DEBUG_ASSERT(root);
5614 KMP_DEBUG_ASSERT(team);
5615 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5616 KMP_DEBUG_ASSERT(team->t.t_threads);
5618 int use_hot_team = team == root->r.r_hot_team;
5619 #if KMP_NESTED_HOT_TEAMS
5622 level = team->t.t_active_level - 1;
5623 if (master->th.th_teams_microtask) {
5624 if (master->th.th_teams_size.nteams > 1) {
5628 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5629 master->th.th_teams_level == team->t.t_level) {
5635 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5637 if (level < __kmp_hot_teams_max_level) {
5638 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5645 TCW_SYNC_PTR(team->t.t_pkfn,
5648 team->t.t_copyin_counter = 0;
5653 if (!use_hot_team) {
5654 if (__kmp_tasking_mode != tskm_immediate_exec) {
5656 for (f = 1; f < team->t.t_nproc; ++f) {
5657 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5658 kmp_info_t *th = team->t.t_threads[f];
5659 volatile kmp_uint32 *state = &th->th.th_reap_state;
5660 while (*state != KMP_SAFE_TO_REAP) {
5664 if (!__kmp_is_thread_alive(th, &ecode)) {
5665 *state = KMP_SAFE_TO_REAP;
5670 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5671 if (fl.is_sleeping())
5672 fl.resume(__kmp_gtid_from_thread(th));
5679 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5680 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5681 if (task_team != NULL) {
5682 for (f = 0; f < team->t.t_nproc; ++f) {
5683 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5684 team->t.t_threads[f]->th.th_task_team = NULL;
5688 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5689 __kmp_get_gtid(), task_team, team->t.t_id));
5690 #if KMP_NESTED_HOT_TEAMS
5691 __kmp_free_task_team(master, task_team);
5693 team->t.t_task_team[tt_idx] = NULL;
5699 team->t.t_parent = NULL;
5700 team->t.t_level = 0;
5701 team->t.t_active_level = 0;
5704 for (f = 1; f < team->t.t_nproc; ++f) {
5705 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5706 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5707 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5710 __kmp_free_thread(team->t.t_threads[f]);
5713 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5716 team->t.b->go_release();
5717 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5718 for (f = 1; f < team->t.t_nproc; ++f) {
5719 if (team->t.b->sleep[f].sleep) {
5720 __kmp_atomic_resume_64(
5721 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5722 (kmp_atomic_flag_64<> *)NULL);
5727 for (
int f = 1; f < team->t.t_nproc; ++f) {
5728 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5734 for (f = 1; f < team->t.t_nproc; ++f) {
5735 team->t.t_threads[f] = NULL;
5738 if (team->t.t_max_nproc > 1 &&
5739 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5740 distributedBarrier::deallocate(team->t.b);
5745 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5746 __kmp_team_pool = (
volatile kmp_team_t *)team;
5749 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5750 team->t.t_threads[1]->th.th_cg_roots);
5751 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5753 for (f = 1; f < team->t.t_nproc; ++f) {
5754 kmp_info_t *thr = team->t.t_threads[f];
5755 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5756 thr->th.th_cg_roots->cg_root == thr);
5758 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5759 thr->th.th_cg_roots = tmp->up;
5760 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5761 " up to node %p. cg_nthreads was %d\n",
5762 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5763 int i = tmp->cg_nthreads--;
5768 if (thr->th.th_cg_roots)
5769 thr->th.th_current_task->td_icvs.thread_limit =
5770 thr->th.th_cg_roots->cg_thread_limit;
5779 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5780 kmp_team_t *next_pool = team->t.t_next_pool;
5782 KMP_DEBUG_ASSERT(team);
5783 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5784 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5785 KMP_DEBUG_ASSERT(team->t.t_threads);
5786 KMP_DEBUG_ASSERT(team->t.t_argv);
5791 __kmp_free_team_arrays(team);
5792 if (team->t.t_argv != &team->t.t_inline_argv[0])
5793 __kmp_free((
void *)team->t.t_argv);
5825 void __kmp_free_thread(kmp_info_t *this_th) {
5829 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5830 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5832 KMP_DEBUG_ASSERT(this_th);
5837 kmp_balign_t *balign = this_th->th.th_bar;
5838 for (b = 0; b < bs_last_barrier; ++b) {
5839 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5840 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5841 balign[b].bb.team = NULL;
5842 balign[b].bb.leaf_kids = 0;
5844 this_th->th.th_task_state = 0;
5845 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5848 TCW_PTR(this_th->th.th_team, NULL);
5849 TCW_PTR(this_th->th.th_root, NULL);
5850 TCW_PTR(this_th->th.th_dispatch, NULL);
5852 while (this_th->th.th_cg_roots) {
5853 this_th->th.th_cg_roots->cg_nthreads--;
5854 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5855 " %p of thread %p to %d\n",
5856 this_th, this_th->th.th_cg_roots,
5857 this_th->th.th_cg_roots->cg_root,
5858 this_th->th.th_cg_roots->cg_nthreads));
5859 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5860 if (tmp->cg_root == this_th) {
5861 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5863 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5864 this_th->th.th_cg_roots = tmp->up;
5867 if (tmp->cg_nthreads == 0) {
5870 this_th->th.th_cg_roots = NULL;
5880 __kmp_free_implicit_task(this_th);
5881 this_th->th.th_current_task = NULL;
5885 gtid = this_th->th.th_info.ds.ds_gtid;
5886 if (__kmp_thread_pool_insert_pt != NULL) {
5887 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5888 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5889 __kmp_thread_pool_insert_pt = NULL;
5898 if (__kmp_thread_pool_insert_pt != NULL) {
5899 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5901 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5903 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5904 scan = &((*scan)->th.th_next_pool))
5909 TCW_PTR(this_th->th.th_next_pool, *scan);
5910 __kmp_thread_pool_insert_pt = *scan = this_th;
5911 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5912 (this_th->th.th_info.ds.ds_gtid <
5913 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5914 TCW_4(this_th->th.th_in_pool, TRUE);
5915 __kmp_suspend_initialize_thread(this_th);
5916 __kmp_lock_suspend_mx(this_th);
5917 if (this_th->th.th_active == TRUE) {
5918 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5919 this_th->th.th_active_in_pool = TRUE;
5923 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5926 __kmp_unlock_suspend_mx(this_th);
5928 TCW_4(__kmp_nth, __kmp_nth - 1);
5930 #ifdef KMP_ADJUST_BLOCKTIME
5933 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5934 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5935 if (__kmp_nth <= __kmp_avail_proc) {
5936 __kmp_zero_bt = FALSE;
5946 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5947 #if OMP_PROFILING_SUPPORT
5948 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5950 if (ProfileTraceFile)
5951 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5954 int gtid = this_thr->th.th_info.ds.ds_gtid;
5956 kmp_team_t **
volatile pteam;
5959 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5961 if (__kmp_env_consistency_check) {
5962 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5966 if (ompd_state & OMPD_ENABLE_BP)
5967 ompd_bp_thread_begin();
5971 ompt_data_t *thread_data =
nullptr;
5972 if (ompt_enabled.enabled) {
5973 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5974 *thread_data = ompt_data_none;
5976 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5977 this_thr->th.ompt_thread_info.wait_id = 0;
5978 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5979 this_thr->th.ompt_thread_info.parallel_flags = 0;
5980 if (ompt_enabled.ompt_callback_thread_begin) {
5981 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5982 ompt_thread_worker, thread_data);
5984 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5989 while (!TCR_4(__kmp_global.g.g_done)) {
5990 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5994 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5997 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6000 if (ompt_enabled.enabled) {
6001 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6005 pteam = &this_thr->th.th_team;
6008 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6010 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6013 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6014 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6015 (*pteam)->t.t_pkfn));
6017 updateHWFPControl(*pteam);
6020 if (ompt_enabled.enabled) {
6021 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6025 rc = (*pteam)->t.t_invoke(gtid);
6029 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6030 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6031 (*pteam)->t.t_pkfn));
6034 if (ompt_enabled.enabled) {
6036 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6038 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6042 __kmp_join_barrier(gtid);
6045 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
6048 if (ompd_state & OMPD_ENABLE_BP)
6049 ompd_bp_thread_end();
6053 if (ompt_enabled.ompt_callback_thread_end) {
6054 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6058 this_thr->th.th_task_team = NULL;
6060 __kmp_common_destroy_gtid(gtid);
6062 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6065 #if OMP_PROFILING_SUPPORT
6066 llvm::timeTraceProfilerFinishThread();
6073 void __kmp_internal_end_dest(
void *specific_gtid) {
6076 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6078 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6082 __kmp_internal_end_thread(gtid);
6085 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6087 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6088 __kmp_internal_end_atexit();
6095 void __kmp_internal_end_atexit(
void) {
6096 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6120 __kmp_internal_end_library(-1);
6122 __kmp_close_console();
6126 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6131 KMP_DEBUG_ASSERT(thread != NULL);
6133 gtid = thread->th.th_info.ds.ds_gtid;
6136 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6139 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6141 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6143 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6145 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6149 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6151 __kmp_release_64(&flag);
6156 __kmp_reap_worker(thread);
6168 if (thread->th.th_active_in_pool) {
6169 thread->th.th_active_in_pool = FALSE;
6170 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6171 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6175 __kmp_free_implicit_task(thread);
6179 __kmp_free_fast_memory(thread);
6182 __kmp_suspend_uninitialize_thread(thread);
6184 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6185 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6190 #ifdef KMP_ADJUST_BLOCKTIME
6193 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6194 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6195 if (__kmp_nth <= __kmp_avail_proc) {
6196 __kmp_zero_bt = FALSE;
6202 if (__kmp_env_consistency_check) {
6203 if (thread->th.th_cons) {
6204 __kmp_free_cons_stack(thread->th.th_cons);
6205 thread->th.th_cons = NULL;
6209 if (thread->th.th_pri_common != NULL) {
6210 __kmp_free(thread->th.th_pri_common);
6211 thread->th.th_pri_common = NULL;
6214 if (thread->th.th_task_state_memo_stack != NULL) {
6215 __kmp_free(thread->th.th_task_state_memo_stack);
6216 thread->th.th_task_state_memo_stack = NULL;
6220 if (thread->th.th_local.bget_data != NULL) {
6221 __kmp_finalize_bget(thread);
6225 #if KMP_AFFINITY_SUPPORTED
6226 if (thread->th.th_affin_mask != NULL) {
6227 KMP_CPU_FREE(thread->th.th_affin_mask);
6228 thread->th.th_affin_mask = NULL;
6232 #if KMP_USE_HIER_SCHED
6233 if (thread->th.th_hier_bar_data != NULL) {
6234 __kmp_free(thread->th.th_hier_bar_data);
6235 thread->th.th_hier_bar_data = NULL;
6239 __kmp_reap_team(thread->th.th_serial_team);
6240 thread->th.th_serial_team = NULL;
6247 static void __kmp_itthash_clean(kmp_info_t *th) {
6249 if (__kmp_itt_region_domains.count > 0) {
6250 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6251 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6253 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6254 __kmp_thread_free(th, bucket);
6259 if (__kmp_itt_barrier_domains.count > 0) {
6260 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6261 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6263 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6264 __kmp_thread_free(th, bucket);
6272 static void __kmp_internal_end(
void) {
6276 __kmp_unregister_library();
6283 __kmp_reclaim_dead_roots();
6287 for (i = 0; i < __kmp_threads_capacity; i++)
6289 if (__kmp_root[i]->r.r_active)
6292 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6294 if (i < __kmp_threads_capacity) {
6306 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6307 if (TCR_4(__kmp_init_monitor)) {
6308 __kmp_reap_monitor(&__kmp_monitor);
6309 TCW_4(__kmp_init_monitor, 0);
6311 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6312 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6318 for (i = 0; i < __kmp_threads_capacity; i++) {
6319 if (__kmp_root[i]) {
6322 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6331 while (__kmp_thread_pool != NULL) {
6333 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6334 __kmp_thread_pool = thread->th.th_next_pool;
6336 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6337 thread->th.th_next_pool = NULL;
6338 thread->th.th_in_pool = FALSE;
6339 __kmp_reap_thread(thread, 0);
6341 __kmp_thread_pool_insert_pt = NULL;
6344 while (__kmp_team_pool != NULL) {
6346 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6347 __kmp_team_pool = team->t.t_next_pool;
6349 team->t.t_next_pool = NULL;
6350 __kmp_reap_team(team);
6353 __kmp_reap_task_teams();
6360 for (i = 0; i < __kmp_threads_capacity; i++) {
6361 kmp_info_t *thr = __kmp_threads[i];
6362 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6367 for (i = 0; i < __kmp_threads_capacity; ++i) {
6374 TCW_SYNC_4(__kmp_init_common, FALSE);
6376 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6384 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6385 if (TCR_4(__kmp_init_monitor)) {
6386 __kmp_reap_monitor(&__kmp_monitor);
6387 TCW_4(__kmp_init_monitor, 0);
6389 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6390 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6393 TCW_4(__kmp_init_gtid, FALSE);
6402 void __kmp_internal_end_library(
int gtid_req) {
6409 if (__kmp_global.g.g_abort) {
6410 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6414 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6415 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6420 if (TCR_4(__kmp_init_hidden_helper) &&
6421 !TCR_4(__kmp_hidden_helper_team_done)) {
6422 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6424 __kmp_hidden_helper_main_thread_release();
6426 __kmp_hidden_helper_threads_deinitz_wait();
6432 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6434 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6435 if (gtid == KMP_GTID_SHUTDOWN) {
6436 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6437 "already shutdown\n"));
6439 }
else if (gtid == KMP_GTID_MONITOR) {
6440 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6441 "registered, or system shutdown\n"));
6443 }
else if (gtid == KMP_GTID_DNE) {
6444 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6447 }
else if (KMP_UBER_GTID(gtid)) {
6449 if (__kmp_root[gtid]->r.r_active) {
6450 __kmp_global.g.g_abort = -1;
6451 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6452 __kmp_unregister_library();
6454 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6458 __kmp_itthash_clean(__kmp_threads[gtid]);
6461 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6462 __kmp_unregister_root_current_thread(gtid);
6469 #ifdef DUMP_DEBUG_ON_EXIT
6470 if (__kmp_debug_buf)
6471 __kmp_dump_debug_buffer();
6476 __kmp_unregister_library();
6481 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6484 if (__kmp_global.g.g_abort) {
6485 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6487 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6490 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6491 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6500 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6503 __kmp_internal_end();
6505 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6506 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6508 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6510 #ifdef DUMP_DEBUG_ON_EXIT
6511 if (__kmp_debug_buf)
6512 __kmp_dump_debug_buffer();
6516 __kmp_close_console();
6519 __kmp_fini_allocator();
6523 void __kmp_internal_end_thread(
int gtid_req) {
6532 if (__kmp_global.g.g_abort) {
6533 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6537 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6538 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6543 if (TCR_4(__kmp_init_hidden_helper) &&
6544 !TCR_4(__kmp_hidden_helper_team_done)) {
6545 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6547 __kmp_hidden_helper_main_thread_release();
6549 __kmp_hidden_helper_threads_deinitz_wait();
6556 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6558 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6559 if (gtid == KMP_GTID_SHUTDOWN) {
6560 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6561 "already shutdown\n"));
6563 }
else if (gtid == KMP_GTID_MONITOR) {
6564 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6565 "registered, or system shutdown\n"));
6567 }
else if (gtid == KMP_GTID_DNE) {
6568 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6572 }
else if (KMP_UBER_GTID(gtid)) {
6574 if (__kmp_root[gtid]->r.r_active) {
6575 __kmp_global.g.g_abort = -1;
6576 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6578 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6582 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6584 __kmp_unregister_root_current_thread(gtid);
6588 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6591 __kmp_threads[gtid]->th.th_task_team = NULL;
6595 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6601 if (__kmp_pause_status != kmp_hard_paused)
6605 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6610 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6613 if (__kmp_global.g.g_abort) {
6614 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6616 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6619 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6620 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6631 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6633 for (i = 0; i < __kmp_threads_capacity; ++i) {
6634 if (KMP_UBER_GTID(i)) {
6637 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6638 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6639 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6646 __kmp_internal_end();
6648 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6649 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6651 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6653 #ifdef DUMP_DEBUG_ON_EXIT
6654 if (__kmp_debug_buf)
6655 __kmp_dump_debug_buffer();
6662 static long __kmp_registration_flag = 0;
6664 static char *__kmp_registration_str = NULL;
6667 static inline char *__kmp_reg_status_name() {
6673 #if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6674 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6677 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6681 void __kmp_register_library_startup(
void) {
6683 char *name = __kmp_reg_status_name();
6689 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6690 __kmp_initialize_system_tick();
6692 __kmp_read_system_time(&time.dtime);
6693 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6694 __kmp_registration_str =
6695 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6696 __kmp_registration_flag, KMP_LIBRARY_FILE);
6698 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6699 __kmp_registration_str));
6705 #if defined(KMP_USE_SHM)
6706 char *shm_name = __kmp_str_format(
"/%s", name);
6707 int shm_preexist = 0;
6709 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6710 if ((fd1 == -1) && (errno == EEXIST)) {
6713 fd1 = shm_open(shm_name, O_RDWR, 0666);
6716 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6722 }
else if (fd1 == -1) {
6725 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM2"), KMP_ERR(errno),
6728 if (shm_preexist == 0) {
6730 if (ftruncate(fd1, SHM_SIZE) == -1) {
6732 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6733 KMP_ERR(errno), __kmp_msg_null);
6737 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6738 if (data1 == MAP_FAILED) {
6740 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6743 if (shm_preexist == 0) {
6744 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6747 value = __kmp_str_format(
"%s", data1);
6748 munmap(data1, SHM_SIZE);
6752 __kmp_env_set(name, __kmp_registration_str, 0);
6754 value = __kmp_env_get(name);
6757 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6764 char *flag_addr_str = NULL;
6765 char *flag_val_str = NULL;
6766 char const *file_name = NULL;
6767 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6768 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6771 unsigned long *flag_addr = 0;
6772 unsigned long flag_val = 0;
6773 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6774 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6775 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6779 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6793 file_name =
"unknown library";
6798 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6799 if (!__kmp_str_match_true(duplicate_ok)) {
6801 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6802 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6804 KMP_INTERNAL_FREE(duplicate_ok);
6805 __kmp_duplicate_library_ok = 1;
6810 #if defined(KMP_USE_SHM)
6812 shm_unlink(shm_name);
6815 __kmp_env_unset(name);
6819 KMP_DEBUG_ASSERT(0);
6823 KMP_INTERNAL_FREE((
void *)value);
6824 #if defined(KMP_USE_SHM)
6825 KMP_INTERNAL_FREE((
void *)shm_name);
6828 KMP_INTERNAL_FREE((
void *)name);
6832 void __kmp_unregister_library(
void) {
6834 char *name = __kmp_reg_status_name();
6837 #if defined(KMP_USE_SHM)
6838 char *shm_name = __kmp_str_format(
"/%s", name);
6839 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6844 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6845 if (data1 != MAP_FAILED) {
6846 value = __kmp_str_format(
"%s", data1);
6847 munmap(data1, SHM_SIZE);
6851 value = __kmp_env_get(name);
6854 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6855 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6856 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6858 #if defined(KMP_USE_SHM)
6859 shm_unlink(shm_name);
6861 __kmp_env_unset(name);
6865 #if defined(KMP_USE_SHM)
6866 KMP_INTERNAL_FREE(shm_name);
6869 KMP_INTERNAL_FREE(__kmp_registration_str);
6870 KMP_INTERNAL_FREE(value);
6871 KMP_INTERNAL_FREE(name);
6873 __kmp_registration_flag = 0;
6874 __kmp_registration_str = NULL;
6881 #if KMP_MIC_SUPPORTED
6883 static void __kmp_check_mic_type() {
6884 kmp_cpuid_t cpuid_state = {0};
6885 kmp_cpuid_t *cs_p = &cpuid_state;
6886 __kmp_x86_cpuid(1, 0, cs_p);
6888 if ((cs_p->eax & 0xff0) == 0xB10) {
6889 __kmp_mic_type = mic2;
6890 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6891 __kmp_mic_type = mic3;
6893 __kmp_mic_type = non_mic;
6900 static void __kmp_user_level_mwait_init() {
6901 struct kmp_cpuid buf;
6902 __kmp_x86_cpuid(7, 0, &buf);
6903 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6904 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6905 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6906 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6907 __kmp_umwait_enabled));
6909 #elif KMP_HAVE_MWAIT
6910 #ifndef AT_INTELPHIUSERMWAIT
6913 #define AT_INTELPHIUSERMWAIT 10000
6918 unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
6919 unsigned long getauxval(
unsigned long) {
return 0; }
6921 static void __kmp_user_level_mwait_init() {
6926 if (__kmp_mic_type == mic3) {
6927 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6928 if ((res & 0x1) || __kmp_user_level_mwait) {
6929 __kmp_mwait_enabled = TRUE;
6930 if (__kmp_user_level_mwait) {
6931 KMP_INFORM(EnvMwaitWarn);
6934 __kmp_mwait_enabled = FALSE;
6937 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
6938 "__kmp_mwait_enabled = %d\n",
6939 __kmp_mic_type, __kmp_mwait_enabled));
6943 static void __kmp_do_serial_initialize(
void) {
6947 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6949 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6950 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6951 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6952 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6953 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6963 __kmp_validate_locks();
6966 __kmp_init_allocator();
6971 __kmp_register_library_startup();
6974 if (TCR_4(__kmp_global.g.g_done)) {
6975 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6978 __kmp_global.g.g_abort = 0;
6979 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6982 #if KMP_USE_ADAPTIVE_LOCKS
6983 #if KMP_DEBUG_ADAPTIVE_LOCKS
6984 __kmp_init_speculative_stats();
6987 #if KMP_STATS_ENABLED
6990 __kmp_init_lock(&__kmp_global_lock);
6991 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6992 __kmp_init_lock(&__kmp_debug_lock);
6993 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6994 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6995 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6996 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6997 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6998 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6999 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7000 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7001 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7002 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7003 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7004 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7005 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7006 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7007 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7009 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7011 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7015 __kmp_runtime_initialize();
7017 #if KMP_MIC_SUPPORTED
7018 __kmp_check_mic_type();
7025 __kmp_abort_delay = 0;
7029 __kmp_dflt_team_nth_ub = __kmp_xproc;
7030 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7031 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7033 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7034 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7036 __kmp_max_nth = __kmp_sys_max_nth;
7037 __kmp_cg_max_nth = __kmp_sys_max_nth;
7038 __kmp_teams_max_nth = __kmp_xproc;
7039 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7040 __kmp_teams_max_nth = __kmp_sys_max_nth;
7045 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7047 __kmp_monitor_wakeups =
7048 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7049 __kmp_bt_intervals =
7050 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7053 __kmp_library = library_throughput;
7055 __kmp_static = kmp_sch_static_balanced;
7062 #if KMP_FAST_REDUCTION_BARRIER
7063 #define kmp_reduction_barrier_gather_bb ((int)1)
7064 #define kmp_reduction_barrier_release_bb ((int)1)
7065 #define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7066 #define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7068 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7069 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7070 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7071 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7072 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7073 #if KMP_FAST_REDUCTION_BARRIER
7074 if (i == bs_reduction_barrier) {
7076 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7077 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7078 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7079 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7083 #if KMP_FAST_REDUCTION_BARRIER
7084 #undef kmp_reduction_barrier_release_pat
7085 #undef kmp_reduction_barrier_gather_pat
7086 #undef kmp_reduction_barrier_release_bb
7087 #undef kmp_reduction_barrier_gather_bb
7089 #if KMP_MIC_SUPPORTED
7090 if (__kmp_mic_type == mic2) {
7092 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7093 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7095 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7096 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7098 #if KMP_FAST_REDUCTION_BARRIER
7099 if (__kmp_mic_type == mic2) {
7100 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7101 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7108 __kmp_env_checks = TRUE;
7110 __kmp_env_checks = FALSE;
7114 __kmp_foreign_tp = TRUE;
7116 __kmp_global.g.g_dynamic = FALSE;
7117 __kmp_global.g.g_dynamic_mode = dynamic_default;
7119 __kmp_init_nesting_mode();
7121 __kmp_env_initialize(NULL);
7123 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7124 __kmp_user_level_mwait_init();
7128 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7129 if (__kmp_str_match_true(val)) {
7130 kmp_str_buf_t buffer;
7131 __kmp_str_buf_init(&buffer);
7132 __kmp_i18n_dump_catalog(&buffer);
7133 __kmp_printf(
"%s", buffer.str);
7134 __kmp_str_buf_free(&buffer);
7136 __kmp_env_free(&val);
7139 __kmp_threads_capacity =
7140 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7142 __kmp_tp_capacity = __kmp_default_tp_capacity(
7143 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7148 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7149 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7150 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7151 __kmp_thread_pool = NULL;
7152 __kmp_thread_pool_insert_pt = NULL;
7153 __kmp_team_pool = NULL;
7160 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7162 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7163 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7164 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7167 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7169 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7174 gtid = __kmp_register_root(TRUE);
7175 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7176 KMP_ASSERT(KMP_UBER_GTID(gtid));
7177 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7181 __kmp_common_initialize();
7185 __kmp_register_atfork();
7188 #if !KMP_DYNAMIC_LIB
7192 int rc = atexit(__kmp_internal_end_atexit);
7194 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7200 #if KMP_HANDLE_SIGNALS
7206 __kmp_install_signals(FALSE);
7209 __kmp_install_signals(TRUE);
7214 __kmp_init_counter++;
7216 __kmp_init_serial = TRUE;
7218 if (__kmp_settings) {
7222 if (__kmp_display_env || __kmp_display_env_verbose) {
7223 __kmp_env_print_2();
7232 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7235 void __kmp_serial_initialize(
void) {
7236 if (__kmp_init_serial) {
7239 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7240 if (__kmp_init_serial) {
7241 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7244 __kmp_do_serial_initialize();
7245 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7248 static void __kmp_do_middle_initialize(
void) {
7250 int prev_dflt_team_nth;
7252 if (!__kmp_init_serial) {
7253 __kmp_do_serial_initialize();
7256 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7260 prev_dflt_team_nth = __kmp_dflt_team_nth;
7262 #if KMP_AFFINITY_SUPPORTED
7265 __kmp_affinity_initialize();
7269 KMP_ASSERT(__kmp_xproc > 0);
7270 if (__kmp_avail_proc == 0) {
7271 __kmp_avail_proc = __kmp_xproc;
7277 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7278 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7283 if (__kmp_dflt_team_nth == 0) {
7284 #ifdef KMP_DFLT_NTH_CORES
7286 __kmp_dflt_team_nth = __kmp_ncores;
7287 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7288 "__kmp_ncores (%d)\n",
7289 __kmp_dflt_team_nth));
7292 __kmp_dflt_team_nth = __kmp_avail_proc;
7293 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7294 "__kmp_avail_proc(%d)\n",
7295 __kmp_dflt_team_nth));
7299 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7300 __kmp_dflt_team_nth = KMP_MIN_NTH;
7302 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7303 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7306 if (__kmp_nesting_mode > 0)
7307 __kmp_set_nesting_mode_threads();
7311 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7313 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7318 for (i = 0; i < __kmp_threads_capacity; i++) {
7319 kmp_info_t *thread = __kmp_threads[i];
7322 if (thread->th.th_current_task->td_icvs.nproc != 0)
7325 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7330 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7331 __kmp_dflt_team_nth));
7333 #ifdef KMP_ADJUST_BLOCKTIME
7335 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7336 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7337 if (__kmp_nth > __kmp_avail_proc) {
7338 __kmp_zero_bt = TRUE;
7344 TCW_SYNC_4(__kmp_init_middle, TRUE);
7346 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7349 void __kmp_middle_initialize(
void) {
7350 if (__kmp_init_middle) {
7353 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7354 if (__kmp_init_middle) {
7355 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7358 __kmp_do_middle_initialize();
7359 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7362 void __kmp_parallel_initialize(
void) {
7363 int gtid = __kmp_entry_gtid();
7366 if (TCR_4(__kmp_init_parallel))
7368 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7369 if (TCR_4(__kmp_init_parallel)) {
7370 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7375 if (TCR_4(__kmp_global.g.g_done)) {
7378 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7379 __kmp_infinite_loop();
7385 if (!__kmp_init_middle) {
7386 __kmp_do_middle_initialize();
7388 __kmp_assign_root_init_mask();
7389 __kmp_resume_if_hard_paused();
7392 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7393 KMP_ASSERT(KMP_UBER_GTID(gtid));
7395 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7398 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7399 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7400 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7404 #if KMP_HANDLE_SIGNALS
7406 __kmp_install_signals(TRUE);
7410 __kmp_suspend_initialize();
7412 #if defined(USE_LOAD_BALANCE)
7413 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7414 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7417 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7418 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7422 if (__kmp_version) {
7423 __kmp_print_version_2();
7427 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7430 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7432 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7435 void __kmp_hidden_helper_initialize() {
7436 if (TCR_4(__kmp_init_hidden_helper))
7440 if (!TCR_4(__kmp_init_parallel))
7441 __kmp_parallel_initialize();
7445 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7446 if (TCR_4(__kmp_init_hidden_helper)) {
7447 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7452 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7456 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7459 __kmp_do_initialize_hidden_helper_threads();
7462 __kmp_hidden_helper_threads_initz_wait();
7465 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7467 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7472 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7474 kmp_disp_t *dispatch;
7479 this_thr->th.th_local.this_construct = 0;
7480 #if KMP_CACHE_MANAGE
7481 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7483 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7484 KMP_DEBUG_ASSERT(dispatch);
7485 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7489 dispatch->th_disp_index = 0;
7490 dispatch->th_doacross_buf_idx = 0;
7491 if (__kmp_env_consistency_check)
7492 __kmp_push_parallel(gtid, team->t.t_ident);
7497 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7499 if (__kmp_env_consistency_check)
7500 __kmp_pop_parallel(gtid, team->t.t_ident);
7502 __kmp_finish_implicit_task(this_thr);
7505 int __kmp_invoke_task_func(
int gtid) {
7507 int tid = __kmp_tid_from_gtid(gtid);
7508 kmp_info_t *this_thr = __kmp_threads[gtid];
7509 kmp_team_t *team = this_thr->th.th_team;
7511 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7513 if (__itt_stack_caller_create_ptr) {
7515 if (team->t.t_stack_id != NULL) {
7516 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7518 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7519 __kmp_itt_stack_callee_enter(
7520 (__itt_caller)team->t.t_parent->t.t_stack_id);
7524 #if INCLUDE_SSC_MARKS
7525 SSC_MARK_INVOKING();
7530 void **exit_frame_p;
7531 ompt_data_t *my_task_data;
7532 ompt_data_t *my_parallel_data;
7535 if (ompt_enabled.enabled) {
7536 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7537 .ompt_task_info.frame.exit_frame.ptr);
7539 exit_frame_p = &dummy;
7543 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7544 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7545 if (ompt_enabled.ompt_callback_implicit_task) {
7546 ompt_team_size = team->t.t_nproc;
7547 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7548 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7549 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7550 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7554 #if KMP_STATS_ENABLED
7556 if (previous_state == stats_state_e::TEAMS_REGION) {
7557 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7559 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7561 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7564 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7565 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7572 *exit_frame_p = NULL;
7573 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7576 #if KMP_STATS_ENABLED
7577 if (previous_state == stats_state_e::TEAMS_REGION) {
7578 KMP_SET_THREAD_STATE(previous_state);
7580 KMP_POP_PARTITIONED_TIMER();
7584 if (__itt_stack_caller_create_ptr) {
7586 if (team->t.t_stack_id != NULL) {
7587 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7589 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7590 __kmp_itt_stack_callee_leave(
7591 (__itt_caller)team->t.t_parent->t.t_stack_id);
7595 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7600 void __kmp_teams_master(
int gtid) {
7602 kmp_info_t *thr = __kmp_threads[gtid];
7603 kmp_team_t *team = thr->th.th_team;
7604 ident_t *loc = team->t.t_ident;
7605 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7606 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7607 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7608 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7609 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7612 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7615 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7616 tmp->cg_nthreads = 1;
7617 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7618 " cg_nthreads to 1\n",
7620 tmp->up = thr->th.th_cg_roots;
7621 thr->th.th_cg_roots = tmp;
7625 #if INCLUDE_SSC_MARKS
7628 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7629 (microtask_t)thr->th.th_teams_microtask,
7630 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7631 #if INCLUDE_SSC_MARKS
7635 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7636 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7639 __kmp_join_call(loc, gtid
7648 int __kmp_invoke_teams_master(
int gtid) {
7649 kmp_info_t *this_thr = __kmp_threads[gtid];
7650 kmp_team_t *team = this_thr->th.th_team;
7652 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7653 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7654 (
void *)__kmp_teams_master);
7656 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7658 int tid = __kmp_tid_from_gtid(gtid);
7659 ompt_data_t *task_data =
7660 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7661 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7662 if (ompt_enabled.ompt_callback_implicit_task) {
7663 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7664 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7666 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7669 __kmp_teams_master(gtid);
7671 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7673 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7682 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7683 kmp_info_t *thr = __kmp_threads[gtid];
7685 if (num_threads > 0)
7686 thr->th.th_set_nproc = num_threads;
7689 static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7691 KMP_DEBUG_ASSERT(thr);
7693 if (!TCR_4(__kmp_init_middle))
7694 __kmp_middle_initialize();
7695 __kmp_assign_root_init_mask();
7696 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7697 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7699 if (num_threads == 0) {
7700 if (__kmp_teams_thread_limit > 0) {
7701 num_threads = __kmp_teams_thread_limit;
7703 num_threads = __kmp_avail_proc / num_teams;
7708 if (num_threads > __kmp_dflt_team_nth) {
7709 num_threads = __kmp_dflt_team_nth;
7711 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7712 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7714 if (num_teams * num_threads > __kmp_teams_max_nth) {
7715 num_threads = __kmp_teams_max_nth / num_teams;
7717 if (num_threads == 0) {
7721 if (num_threads < 0) {
7722 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7728 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7730 if (num_threads > __kmp_dflt_team_nth) {
7731 num_threads = __kmp_dflt_team_nth;
7733 if (num_teams * num_threads > __kmp_teams_max_nth) {
7734 int new_threads = __kmp_teams_max_nth / num_teams;
7735 if (new_threads == 0) {
7738 if (new_threads != num_threads) {
7739 if (!__kmp_reserve_warn) {
7740 __kmp_reserve_warn = 1;
7741 __kmp_msg(kmp_ms_warning,
7742 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7743 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7746 num_threads = new_threads;
7749 thr->th.th_teams_size.nth = num_threads;
7754 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7756 kmp_info_t *thr = __kmp_threads[gtid];
7757 if (num_teams < 0) {
7760 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7764 if (num_teams == 0) {
7765 if (__kmp_nteams > 0) {
7766 num_teams = __kmp_nteams;
7771 if (num_teams > __kmp_teams_max_nth) {
7772 if (!__kmp_reserve_warn) {
7773 __kmp_reserve_warn = 1;
7774 __kmp_msg(kmp_ms_warning,
7775 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7776 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7778 num_teams = __kmp_teams_max_nth;
7782 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7784 __kmp_push_thread_limit(thr, num_teams, num_threads);
7789 void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7790 int num_teams_ub,
int num_threads) {
7791 kmp_info_t *thr = __kmp_threads[gtid];
7792 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7793 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7794 KMP_DEBUG_ASSERT(num_threads >= 0);
7796 if (num_teams_lb > num_teams_ub) {
7797 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7798 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7803 if (num_teams_lb == 0 && num_teams_ub > 0)
7804 num_teams_lb = num_teams_ub;
7806 if (num_teams_lb == 0 && num_teams_ub == 0) {
7807 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7808 if (num_teams > __kmp_teams_max_nth) {
7809 if (!__kmp_reserve_warn) {
7810 __kmp_reserve_warn = 1;
7811 __kmp_msg(kmp_ms_warning,
7812 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7813 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7815 num_teams = __kmp_teams_max_nth;
7817 }
else if (num_teams_lb == num_teams_ub) {
7818 num_teams = num_teams_ub;
7820 if (num_threads <= 0) {
7821 if (num_teams_ub > __kmp_teams_max_nth) {
7822 num_teams = num_teams_lb;
7824 num_teams = num_teams_ub;
7827 num_teams = (num_threads > __kmp_teams_max_nth)
7829 : __kmp_teams_max_nth / num_threads;
7830 if (num_teams < num_teams_lb) {
7831 num_teams = num_teams_lb;
7832 }
else if (num_teams > num_teams_ub) {
7833 num_teams = num_teams_ub;
7839 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7841 __kmp_push_thread_limit(thr, num_teams, num_threads);
7845 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7846 kmp_info_t *thr = __kmp_threads[gtid];
7847 thr->th.th_set_proc_bind = proc_bind;
7852 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7853 kmp_info_t *this_thr = __kmp_threads[gtid];
7859 KMP_DEBUG_ASSERT(team);
7860 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7861 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7864 team->t.t_construct = 0;
7865 team->t.t_ordered.dt.t_value =
7869 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7870 if (team->t.t_max_nproc > 1) {
7872 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7873 team->t.t_disp_buffer[i].buffer_index = i;
7874 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7877 team->t.t_disp_buffer[0].buffer_index = 0;
7878 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7882 KMP_ASSERT(this_thr->th.th_team == team);
7885 for (f = 0; f < team->t.t_nproc; f++) {
7886 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7887 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7892 __kmp_fork_barrier(gtid, 0);
7895 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7896 kmp_info_t *this_thr = __kmp_threads[gtid];
7898 KMP_DEBUG_ASSERT(team);
7899 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7900 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7906 if (__kmp_threads[gtid] &&
7907 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7908 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7909 __kmp_threads[gtid]);
7910 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7911 "team->t.t_nproc=%d\n",
7912 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7914 __kmp_print_structure();
7916 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7917 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7920 __kmp_join_barrier(gtid);
7922 if (ompt_enabled.enabled &&
7923 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7924 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7925 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7926 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7928 void *codeptr = NULL;
7929 if (KMP_MASTER_TID(ds_tid) &&
7930 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7931 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7932 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7934 if (ompt_enabled.ompt_callback_sync_region_wait) {
7935 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7936 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7939 if (ompt_enabled.ompt_callback_sync_region) {
7940 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7941 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7945 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7946 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7947 ompt_scope_end, NULL, task_data, 0, ds_tid,
7948 ompt_task_implicit);
7954 KMP_ASSERT(this_thr->th.th_team == team);
7959 #ifdef USE_LOAD_BALANCE
7963 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7966 kmp_team_t *hot_team;
7968 if (root->r.r_active) {
7971 hot_team = root->r.r_hot_team;
7972 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7973 return hot_team->t.t_nproc - 1;
7978 for (i = 1; i < hot_team->t.t_nproc; i++) {
7979 if (hot_team->t.t_threads[i]->th.th_active) {
7988 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7991 int hot_team_active;
7992 int team_curr_active;
7995 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7997 KMP_DEBUG_ASSERT(root);
7998 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7999 ->th.th_current_task->td_icvs.dynamic == TRUE);
8000 KMP_DEBUG_ASSERT(set_nproc > 1);
8002 if (set_nproc == 1) {
8003 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8012 pool_active = __kmp_thread_pool_active_nth;
8013 hot_team_active = __kmp_active_hot_team_nproc(root);
8014 team_curr_active = pool_active + hot_team_active + 1;
8017 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8018 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8019 "hot team active = %d\n",
8020 system_active, pool_active, hot_team_active));
8022 if (system_active < 0) {
8026 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8027 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8030 retval = __kmp_avail_proc - __kmp_nth +
8031 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8032 if (retval > set_nproc) {
8035 if (retval < KMP_MIN_NTH) {
8036 retval = KMP_MIN_NTH;
8039 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8047 if (system_active < team_curr_active) {
8048 system_active = team_curr_active;
8050 retval = __kmp_avail_proc - system_active + team_curr_active;
8051 if (retval > set_nproc) {
8054 if (retval < KMP_MIN_NTH) {
8055 retval = KMP_MIN_NTH;
8058 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8067 void __kmp_cleanup(
void) {
8070 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8072 if (TCR_4(__kmp_init_parallel)) {
8073 #if KMP_HANDLE_SIGNALS
8074 __kmp_remove_signals();
8076 TCW_4(__kmp_init_parallel, FALSE);
8079 if (TCR_4(__kmp_init_middle)) {
8080 #if KMP_AFFINITY_SUPPORTED
8081 __kmp_affinity_uninitialize();
8083 __kmp_cleanup_hierarchy();
8084 TCW_4(__kmp_init_middle, FALSE);
8087 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8089 if (__kmp_init_serial) {
8090 __kmp_runtime_destroy();
8091 __kmp_init_serial = FALSE;
8094 __kmp_cleanup_threadprivate_caches();
8096 for (f = 0; f < __kmp_threads_capacity; f++) {
8097 if (__kmp_root[f] != NULL) {
8098 __kmp_free(__kmp_root[f]);
8099 __kmp_root[f] = NULL;
8102 __kmp_free(__kmp_threads);
8105 __kmp_threads = NULL;
8107 __kmp_threads_capacity = 0;
8110 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8112 kmp_old_threads_list_t *next = ptr->next;
8113 __kmp_free(ptr->threads);
8118 #if KMP_USE_DYNAMIC_LOCK
8119 __kmp_cleanup_indirect_user_locks();
8121 __kmp_cleanup_user_locks();
8125 __kmp_free(ompd_env_block);
8126 ompd_env_block = NULL;
8127 ompd_env_block_size = 0;
8131 #if KMP_AFFINITY_SUPPORTED
8132 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8133 __kmp_cpuinfo_file = NULL;
8136 #if KMP_USE_ADAPTIVE_LOCKS
8137 #if KMP_DEBUG_ADAPTIVE_LOCKS
8138 __kmp_print_speculative_stats();
8141 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8142 __kmp_nested_nth.nth = NULL;
8143 __kmp_nested_nth.size = 0;
8144 __kmp_nested_nth.used = 0;
8145 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8146 __kmp_nested_proc_bind.bind_types = NULL;
8147 __kmp_nested_proc_bind.size = 0;
8148 __kmp_nested_proc_bind.used = 0;
8149 if (__kmp_affinity_format) {
8150 KMP_INTERNAL_FREE(__kmp_affinity_format);
8151 __kmp_affinity_format = NULL;
8154 __kmp_i18n_catclose();
8156 #if KMP_USE_HIER_SCHED
8157 __kmp_hier_scheds.deallocate();
8160 #if KMP_STATS_ENABLED
8164 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8169 int __kmp_ignore_mppbeg(
void) {
8172 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8173 if (__kmp_str_match_false(env))
8180 int __kmp_ignore_mppend(
void) {
8183 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8184 if (__kmp_str_match_false(env))
8191 void __kmp_internal_begin(
void) {
8197 gtid = __kmp_entry_gtid();
8198 root = __kmp_threads[gtid]->th.th_root;
8199 KMP_ASSERT(KMP_UBER_GTID(gtid));
8201 if (root->r.r_begin)
8203 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8204 if (root->r.r_begin) {
8205 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8209 root->r.r_begin = TRUE;
8211 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8216 void __kmp_user_set_library(
enum library_type arg) {
8223 gtid = __kmp_entry_gtid();
8224 thread = __kmp_threads[gtid];
8226 root = thread->th.th_root;
8228 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8230 if (root->r.r_in_parallel) {
8232 KMP_WARNING(SetLibraryIncorrectCall);
8237 case library_serial:
8238 thread->th.th_set_nproc = 0;
8239 set__nproc(thread, 1);
8241 case library_turnaround:
8242 thread->th.th_set_nproc = 0;
8243 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8244 : __kmp_dflt_team_nth_ub);
8246 case library_throughput:
8247 thread->th.th_set_nproc = 0;
8248 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8249 : __kmp_dflt_team_nth_ub);
8252 KMP_FATAL(UnknownLibraryType, arg);
8255 __kmp_aux_set_library(arg);
8258 void __kmp_aux_set_stacksize(
size_t arg) {
8259 if (!__kmp_init_serial)
8260 __kmp_serial_initialize();
8263 if (arg & (0x1000 - 1)) {
8264 arg &= ~(0x1000 - 1);
8269 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8272 if (!TCR_4(__kmp_init_parallel)) {
8275 if (value < __kmp_sys_min_stksize)
8276 value = __kmp_sys_min_stksize;
8277 else if (value > KMP_MAX_STKSIZE)
8278 value = KMP_MAX_STKSIZE;
8280 __kmp_stksize = value;
8282 __kmp_env_stksize = TRUE;
8285 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8290 void __kmp_aux_set_library(
enum library_type arg) {
8291 __kmp_library = arg;
8293 switch (__kmp_library) {
8294 case library_serial: {
8295 KMP_INFORM(LibraryIsSerial);
8297 case library_turnaround:
8298 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8299 __kmp_use_yield = 2;
8301 case library_throughput:
8302 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8303 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8306 KMP_FATAL(UnknownLibraryType, arg);
8312 static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8313 kmp_info_t *thr = __kmp_entry_thread();
8314 teams_serialized = 0;
8315 if (thr->th.th_teams_microtask) {
8316 kmp_team_t *team = thr->th.th_team;
8317 int tlevel = thr->th.th_teams_level;
8318 int ii = team->t.t_level;
8319 teams_serialized = team->t.t_serialized;
8320 int level = tlevel + 1;
8321 KMP_DEBUG_ASSERT(ii >= tlevel);
8322 while (ii > level) {
8323 for (teams_serialized = team->t.t_serialized;
8324 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8326 if (team->t.t_serialized && (!teams_serialized)) {
8327 team = team->t.t_parent;
8331 team = team->t.t_parent;
8340 int __kmp_aux_get_team_num() {
8342 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8344 if (serialized > 1) {
8347 return team->t.t_master_tid;
8353 int __kmp_aux_get_num_teams() {
8355 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8357 if (serialized > 1) {
8360 return team->t.t_parent->t.t_nproc;
8399 typedef struct kmp_affinity_format_field_t {
8401 const char *long_name;
8404 } kmp_affinity_format_field_t;
8406 static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8407 #if KMP_AFFINITY_SUPPORTED
8408 {
'A',
"thread_affinity",
's'},
8410 {
't',
"team_num",
'd'},
8411 {
'T',
"num_teams",
'd'},
8412 {
'L',
"nesting_level",
'd'},
8413 {
'n',
"thread_num",
'd'},
8414 {
'N',
"num_threads",
'd'},
8415 {
'a',
"ancestor_tnum",
'd'},
8417 {
'P',
"process_id",
'd'},
8418 {
'i',
"native_thread_id",
'd'}};
8421 static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8423 kmp_str_buf_t *field_buffer) {
8424 int rc, format_index, field_value;
8425 const char *width_left, *width_right;
8426 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8427 static const int FORMAT_SIZE = 20;
8428 char format[FORMAT_SIZE] = {0};
8429 char absolute_short_name = 0;
8431 KMP_DEBUG_ASSERT(gtid >= 0);
8432 KMP_DEBUG_ASSERT(th);
8433 KMP_DEBUG_ASSERT(**ptr ==
'%');
8434 KMP_DEBUG_ASSERT(field_buffer);
8436 __kmp_str_buf_clear(field_buffer);
8443 __kmp_str_buf_cat(field_buffer,
"%", 1);
8454 right_justify =
false;
8456 right_justify =
true;
8460 width_left = width_right = NULL;
8461 if (**ptr >=
'0' && **ptr <=
'9') {
8469 format[format_index++] =
'%';
8471 format[format_index++] =
'-';
8473 format[format_index++] =
'0';
8474 if (width_left && width_right) {
8478 while (i < 8 && width_left < width_right) {
8479 format[format_index++] = *width_left;
8487 found_valid_name =
false;
8488 parse_long_name = (**ptr ==
'{');
8489 if (parse_long_name)
8491 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8492 sizeof(__kmp_affinity_format_table[0]);
8494 char short_name = __kmp_affinity_format_table[i].short_name;
8495 const char *long_name = __kmp_affinity_format_table[i].long_name;
8496 char field_format = __kmp_affinity_format_table[i].field_format;
8497 if (parse_long_name) {
8498 size_t length = KMP_STRLEN(long_name);
8499 if (strncmp(*ptr, long_name, length) == 0) {
8500 found_valid_name =
true;
8503 }
else if (**ptr == short_name) {
8504 found_valid_name =
true;
8507 if (found_valid_name) {
8508 format[format_index++] = field_format;
8509 format[format_index++] =
'\0';
8510 absolute_short_name = short_name;
8514 if (parse_long_name) {
8516 absolute_short_name = 0;
8524 switch (absolute_short_name) {
8526 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8529 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8532 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8535 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8538 static const int BUFFER_SIZE = 256;
8539 char buf[BUFFER_SIZE];
8540 __kmp_expand_host_name(buf, BUFFER_SIZE);
8541 rc = __kmp_str_buf_print(field_buffer, format, buf);
8544 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8547 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8550 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8554 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8555 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8557 #if KMP_AFFINITY_SUPPORTED
8560 __kmp_str_buf_init(&buf);
8561 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8562 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8563 __kmp_str_buf_free(&buf);
8569 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8571 if (parse_long_name) {
8580 KMP_ASSERT(format_index <= FORMAT_SIZE);
8590 size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8591 kmp_str_buf_t *buffer) {
8592 const char *parse_ptr;
8594 const kmp_info_t *th;
8595 kmp_str_buf_t field;
8597 KMP_DEBUG_ASSERT(buffer);
8598 KMP_DEBUG_ASSERT(gtid >= 0);
8600 __kmp_str_buf_init(&field);
8601 __kmp_str_buf_clear(buffer);
8603 th = __kmp_threads[gtid];
8609 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8610 parse_ptr = __kmp_affinity_format;
8612 KMP_DEBUG_ASSERT(parse_ptr);
8614 while (*parse_ptr !=
'\0') {
8616 if (*parse_ptr ==
'%') {
8618 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8619 __kmp_str_buf_catbuf(buffer, &field);
8623 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8628 __kmp_str_buf_free(&field);
8633 void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8635 __kmp_str_buf_init(&buf);
8636 __kmp_aux_capture_affinity(gtid, format, &buf);
8637 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8638 __kmp_str_buf_free(&buf);
8643 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8644 int blocktime = arg;
8650 __kmp_save_internal_controls(thread);
8653 if (blocktime < KMP_MIN_BLOCKTIME)
8654 blocktime = KMP_MIN_BLOCKTIME;
8655 else if (blocktime > KMP_MAX_BLOCKTIME)
8656 blocktime = KMP_MAX_BLOCKTIME;
8658 set__blocktime_team(thread->th.th_team, tid, blocktime);
8659 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8663 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8665 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8666 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8672 set__bt_set_team(thread->th.th_team, tid, bt_set);
8673 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8675 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8676 "bt_intervals=%d, monitor_updates=%d\n",
8677 __kmp_gtid_from_tid(tid, thread->th.th_team),
8678 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8679 __kmp_monitor_wakeups));
8681 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8682 __kmp_gtid_from_tid(tid, thread->th.th_team),
8683 thread->th.th_team->t.t_id, tid, blocktime));
8687 void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8688 if (!__kmp_init_serial) {
8689 __kmp_serial_initialize();
8691 __kmp_env_initialize(str);
8693 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8701 PACKED_REDUCTION_METHOD_T
8702 __kmp_determine_reduction_method(
8703 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8704 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8705 kmp_critical_name *lck) {
8716 PACKED_REDUCTION_METHOD_T retval;
8720 KMP_DEBUG_ASSERT(loc);
8721 KMP_DEBUG_ASSERT(lck);
8723 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8725 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8726 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8728 retval = critical_reduce_block;
8731 team_size = __kmp_get_team_num_threads(global_tid);
8732 if (team_size == 1) {
8734 retval = empty_reduce_block;
8738 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8740 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8741 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
8743 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8744 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8746 int teamsize_cutoff = 4;
8748 #if KMP_MIC_SUPPORTED
8749 if (__kmp_mic_type != non_mic) {
8750 teamsize_cutoff = 8;
8753 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8754 if (tree_available) {
8755 if (team_size <= teamsize_cutoff) {
8756 if (atomic_available) {
8757 retval = atomic_reduce_block;
8760 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8762 }
else if (atomic_available) {
8763 retval = atomic_reduce_block;
8766 #error "Unknown or unsupported OS"
8770 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8772 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8776 if (atomic_available) {
8777 if (num_vars <= 2) {
8778 retval = atomic_reduce_block;
8784 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8785 if (atomic_available && (num_vars <= 3)) {
8786 retval = atomic_reduce_block;
8787 }
else if (tree_available) {
8788 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8789 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8790 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8795 #error "Unknown or unsupported OS"
8799 #error "Unknown or unsupported architecture"
8807 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8810 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8812 int atomic_available, tree_available;
8814 switch ((forced_retval = __kmp_force_reduction_method)) {
8815 case critical_reduce_block:
8819 case atomic_reduce_block:
8820 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8821 if (!atomic_available) {
8822 KMP_WARNING(RedMethodNotSupported,
"atomic");
8823 forced_retval = critical_reduce_block;
8827 case tree_reduce_block:
8828 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8829 if (!tree_available) {
8830 KMP_WARNING(RedMethodNotSupported,
"tree");
8831 forced_retval = critical_reduce_block;
8833 #if KMP_FAST_REDUCTION_BARRIER
8834 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8843 retval = forced_retval;
8846 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8848 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
8849 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8854 kmp_int32 __kmp_get_reduce_method(
void) {
8855 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8860 void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8864 void __kmp_hard_pause() {
8865 __kmp_pause_status = kmp_hard_paused;
8866 __kmp_internal_end_thread(-1);
8870 void __kmp_resume_if_soft_paused() {
8871 if (__kmp_pause_status == kmp_soft_paused) {
8872 __kmp_pause_status = kmp_not_paused;
8874 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8875 kmp_info_t *thread = __kmp_threads[gtid];
8877 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8879 if (fl.is_sleeping())
8881 else if (__kmp_try_suspend_mx(thread)) {
8882 __kmp_unlock_suspend_mx(thread);
8885 if (fl.is_sleeping()) {
8888 }
else if (__kmp_try_suspend_mx(thread)) {
8889 __kmp_unlock_suspend_mx(thread);
8901 int __kmp_pause_resource(kmp_pause_status_t level) {
8902 if (level == kmp_not_paused) {
8903 if (__kmp_pause_status == kmp_not_paused) {
8907 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8908 __kmp_pause_status == kmp_hard_paused);
8909 __kmp_pause_status = kmp_not_paused;
8912 }
else if (level == kmp_soft_paused) {
8913 if (__kmp_pause_status != kmp_not_paused) {
8920 }
else if (level == kmp_hard_paused) {
8921 if (__kmp_pause_status != kmp_not_paused) {
8934 void __kmp_omp_display_env(
int verbose) {
8935 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8936 if (__kmp_init_serial == 0)
8937 __kmp_do_serial_initialize();
8938 __kmp_display_env_impl(!verbose, verbose);
8939 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8943 void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
8945 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
8947 kmp_info_t **other_threads = team->t.t_threads;
8951 for (
int f = 1; f < old_nthreads; ++f) {
8952 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
8954 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
8960 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
8961 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
8965 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
8967 team->t.t_threads[f]->th.th_used_in_team.store(2);
8968 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
8971 team->t.b->go_release();
8977 int count = old_nthreads - 1;
8979 count = old_nthreads - 1;
8980 for (
int f = 1; f < old_nthreads; ++f) {
8981 if (other_threads[f]->th.th_used_in_team.load() != 0) {
8982 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
8983 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
8984 void *, other_threads[f]->th.th_sleep_loc);
8985 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
8988 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
8994 team->t.b->update_num_threads(new_nthreads);
8995 team->t.b->go_reset();
8998 void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9000 KMP_DEBUG_ASSERT(team);
9006 for (
int f = 1; f < new_nthreads; ++f) {
9007 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9008 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9010 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9011 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9012 (kmp_flag_32<false, false> *)NULL);
9018 int count = new_nthreads - 1;
9020 count = new_nthreads - 1;
9021 for (
int f = 1; f < new_nthreads; ++f) {
9022 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9030 kmp_info_t **__kmp_hidden_helper_threads;
9031 kmp_info_t *__kmp_hidden_helper_main_thread;
9032 std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9034 kmp_int32 __kmp_hidden_helper_threads_num = 8;
9035 kmp_int32 __kmp_enable_hidden_helper = TRUE;
9037 kmp_int32 __kmp_hidden_helper_threads_num = 0;
9038 kmp_int32 __kmp_enable_hidden_helper = FALSE;
9042 std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9044 void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9049 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9050 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9051 __kmp_hidden_helper_threads_num)
9057 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9058 __kmp_hidden_helper_initz_release();
9059 __kmp_hidden_helper_main_thread_wait();
9061 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9062 __kmp_hidden_helper_worker_thread_signal();
9068 void __kmp_hidden_helper_threads_initz_routine() {
9070 const int gtid = __kmp_register_root(TRUE);
9071 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9072 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9073 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9074 __kmp_hidden_helper_threads_num;
9076 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9081 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9083 __kmp_hidden_helper_threads_deinitz_release();
9103 void __kmp_init_nesting_mode() {
9104 int levels = KMP_HW_LAST;
9105 __kmp_nesting_mode_nlevels = levels;
9106 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9107 for (
int i = 0; i < levels; ++i)
9108 __kmp_nesting_nth_level[i] = 0;
9109 if (__kmp_nested_nth.size < levels) {
9110 __kmp_nested_nth.nth =
9111 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9112 __kmp_nested_nth.size = levels;
9117 void __kmp_set_nesting_mode_threads() {
9118 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9120 if (__kmp_nesting_mode == 1)
9121 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9122 else if (__kmp_nesting_mode > 1)
9123 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9125 if (__kmp_topology) {
9127 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9128 loc < __kmp_nesting_mode_nlevels;
9129 loc++, hw_level++) {
9130 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9131 if (__kmp_nesting_nth_level[loc] == 1)
9135 if (__kmp_nesting_mode > 1 && loc > 1) {
9136 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9137 int num_cores = __kmp_topology->get_count(core_level);
9138 int upper_levels = 1;
9139 for (
int level = 0; level < loc - 1; ++level)
9140 upper_levels *= __kmp_nesting_nth_level[level];
9141 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9142 __kmp_nesting_nth_level[loc - 1] =
9143 num_cores / __kmp_nesting_nth_level[loc - 2];
9145 __kmp_nesting_mode_nlevels = loc;
9146 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9148 if (__kmp_avail_proc >= 4) {
9149 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9150 __kmp_nesting_nth_level[1] = 2;
9151 __kmp_nesting_mode_nlevels = 2;
9153 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9154 __kmp_nesting_mode_nlevels = 1;
9156 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9158 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9159 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9161 set__nproc(thread, __kmp_nesting_nth_level[0]);
9162 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9163 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9164 if (get__max_active_levels(thread) > 1) {
9166 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9168 if (__kmp_nesting_mode == 1)
9169 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)