14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
28#include "kmp_dispatch_hier.h"
32#include "ompt-specific.h"
35#include "ompd-specific.h"
38#if OMP_PROFILING_SUPPORT
39#include "llvm/Support/TimeProfiler.h"
40static char *ProfileTraceFile =
nullptr;
44#define KMP_USE_PRCTL 0
59#if defined(KMP_GOMP_COMPAT)
60char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX
"alternative compiler support: yes";
64char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
68char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX
"lock type: run time selectable";
72#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
77kmp_info_t __kmp_monitor;
82void __kmp_cleanup(
void);
84static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
86static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
87 kmp_internal_control_t *new_icvs,
89#if KMP_AFFINITY_SUPPORTED
90static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
93static void __kmp_do_serial_initialize(
void);
94void __kmp_fork_barrier(
int gtid,
int tid);
95void __kmp_join_barrier(
int gtid);
96void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
97 kmp_internal_control_t *new_icvs,
ident_t *loc);
99#ifdef USE_LOAD_BALANCE
100static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
103static int __kmp_expand_threads(
int nNeed);
105static int __kmp_unregister_root_other_thread(
int gtid);
107static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
108kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
110void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
112void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
117int __kmp_get_global_thread_id() {
119 kmp_info_t **other_threads;
127 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
128 __kmp_nth, __kmp_all_nth));
135 if (!TCR_4(__kmp_init_gtid))
139 if (TCR_4(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
144 if (TCR_4(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
146 return __kmp_gtid_get_specific();
148 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
150 stack_addr = (
char *)&stack_data;
151 other_threads = __kmp_threads;
164 for (i = 0; i < __kmp_threads_capacity; i++) {
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
171 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
178 if (stack_diff <= stack_size) {
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
189 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
190 "thread, using TLS\n"));
191 i = __kmp_gtid_get_specific();
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i);
205 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
213 stack_base - stack_addr);
217 if (__kmp_storage_map) {
218 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
227int __kmp_get_global_thread_id_reg() {
230 if (!__kmp_init_serial) {
234 if (TCR_4(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
239 if (TCR_4(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
241 gtid = __kmp_gtid_get_specific();
244 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
245 gtid = __kmp_get_global_thread_id();
249 if (gtid == KMP_GTID_DNE) {
251 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
252 "Registering a new gtid.\n"));
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
258 gtid = __kmp_register_root(FALSE);
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
264 KMP_DEBUG_ASSERT(gtid >= 0);
270void __kmp_check_stack_overlap(kmp_info_t *th) {
272 char *stack_beg = NULL;
273 char *stack_end = NULL;
276 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
277 if (__kmp_storage_map) {
278 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
281 gtid = __kmp_gtid_from_thread(th);
283 if (gtid == KMP_GTID_MONITOR) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)",
"mon",
287 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
301 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
302 if (stack_beg == NULL) {
303 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
325 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
331 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
336void __kmp_infinite_loop(
void) {
337 static int done = FALSE;
344#define MAX_MESSAGE 512
346void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE];
351 va_start(ap, format);
352 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (
unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356#if KMP_PRINT_DATA_PLACEMENT
359 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
363 __kmp_storage_map_verbose = FALSE;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
369 const int page_size = KMP_GET_PAGE_SIZE();
371 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
374 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
377 __kmp_printf_no_lock(
" GTID %d\n", gtid);
386 (
char *)p1 += page_size;
387 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
392 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
393 (
char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
396 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
397 (
char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
404 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
410void __kmp_warn(
char const *format, ...) {
411 char buffer[MAX_MESSAGE];
414 if (__kmp_generate_warnings == kmp_warnings_off) {
418 va_start(ap, format);
420 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
421 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
422 __kmp_vprintf(kmp_err, buffer, ap);
423 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
428void __kmp_abort_process() {
430 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
432 if (__kmp_debug_buf) {
433 __kmp_dump_debug_buffer();
436 if (KMP_OS_WINDOWS) {
439 __kmp_global.g.g_abort = SIGABRT;
453 __kmp_unregister_library();
457 __kmp_infinite_loop();
458 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
462void __kmp_abort_thread(
void) {
465 __kmp_infinite_loop();
471static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
472 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
476 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
478 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
479 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
481 __kmp_print_storage_map_gtid(
482 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
483 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
486 &thr->th.th_bar[bs_plain_barrier + 1],
487 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
491 &thr->th.th_bar[bs_forkjoin_barrier + 1],
492 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
495#if KMP_FAST_REDUCTION_BARRIER
496 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
497 &thr->th.th_bar[bs_reduction_barrier + 1],
498 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
506static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
507 int team_id,
int num_thr) {
508 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
509 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
513 &team->t.t_bar[bs_last_barrier],
514 sizeof(kmp_balign_team_t) * bs_last_barrier,
515 "%s_%d.t_bar", header, team_id);
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
518 &team->t.t_bar[bs_plain_barrier + 1],
519 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
522 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
523 &team->t.t_bar[bs_forkjoin_barrier + 1],
524 sizeof(kmp_balign_team_t),
525 "%s_%d.t_bar[forkjoin]", header, team_id);
527#if KMP_FAST_REDUCTION_BARRIER
528 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
529 &team->t.t_bar[bs_reduction_barrier + 1],
530 sizeof(kmp_balign_team_t),
531 "%s_%d.t_bar[reduction]", header, team_id);
534 __kmp_print_storage_map_gtid(
535 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
536 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
538 __kmp_print_storage_map_gtid(
539 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
540 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
542 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
543 &team->t.t_disp_buffer[num_disp_buff],
544 sizeof(dispatch_shared_info_t) * num_disp_buff,
545 "%s_%d.t_disp_buffer", header, team_id);
548static void __kmp_init_allocator() {
549 __kmp_init_memkind();
550 __kmp_init_target_mem();
552static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
556#if ENABLE_LIBOMPTARGET
557static void __kmp_init_omptarget() {
558 __kmp_init_target_task();
567BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
572 case DLL_PROCESS_ATTACH:
573 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
577 case DLL_PROCESS_DETACH:
578 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
591 if (lpReserved == NULL)
592 __kmp_internal_end_library(__kmp_gtid_get_specific());
596 case DLL_THREAD_ATTACH:
597 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
603 case DLL_THREAD_DETACH:
604 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
606 __kmp_internal_end_thread(__kmp_gtid_get_specific());
617void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
618 int gtid = *gtid_ref;
619#ifdef BUILD_PARALLEL_ORDERED
620 kmp_team_t *team = __kmp_team_from_gtid(gtid);
623 if (__kmp_env_consistency_check) {
624 if (__kmp_threads[gtid]->th.th_root->r.r_active)
625#if KMP_USE_DYNAMIC_LOCK
626 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
628 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
631#ifdef BUILD_PARALLEL_ORDERED
632 if (!team->t.t_serialized) {
634 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
642void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
643 int gtid = *gtid_ref;
644#ifdef BUILD_PARALLEL_ORDERED
645 int tid = __kmp_tid_from_gtid(gtid);
646 kmp_team_t *team = __kmp_team_from_gtid(gtid);
649 if (__kmp_env_consistency_check) {
650 if (__kmp_threads[gtid]->th.th_root->r.r_active)
651 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
653#ifdef BUILD_PARALLEL_ORDERED
654 if (!team->t.t_serialized) {
659 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
669int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
674 if (!TCR_4(__kmp_init_parallel))
675 __kmp_parallel_initialize();
676 __kmp_resume_if_soft_paused();
678 th = __kmp_threads[gtid];
679 team = th->th.th_team;
682 th->th.th_ident = id_ref;
684 if (team->t.t_serialized) {
687 kmp_int32 old_this = th->th.th_local.this_construct;
689 ++th->th.th_local.this_construct;
693 if (team->t.t_construct == old_this) {
694 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
695 th->th.th_local.this_construct);
698 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
699 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
700 team->t.t_active_level == 1) {
702 __kmp_itt_metadata_single(id_ref);
707 if (__kmp_env_consistency_check) {
708 if (status && push_ws) {
709 __kmp_push_workshare(gtid, ct_psingle, id_ref);
711 __kmp_check_workshare(gtid, ct_psingle, id_ref);
716 __kmp_itt_single_start(gtid);
722void __kmp_exit_single(
int gtid) {
724 __kmp_itt_single_end(gtid);
726 if (__kmp_env_consistency_check)
727 __kmp_pop_workshare(gtid, ct_psingle, NULL);
736static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
737 int master_tid,
int set_nthreads,
741 KMP_DEBUG_ASSERT(__kmp_init_serial);
742 KMP_DEBUG_ASSERT(root && parent_team);
743 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
747 new_nthreads = set_nthreads;
748 if (!get__dynamic_2(parent_team, master_tid)) {
751#ifdef USE_LOAD_BALANCE
752 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
753 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
754 if (new_nthreads == 1) {
755 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
756 "reservation to 1 thread\n",
760 if (new_nthreads < set_nthreads) {
761 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
762 "reservation to %d threads\n",
763 master_tid, new_nthreads));
767 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
768 new_nthreads = __kmp_avail_proc - __kmp_nth +
769 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
770 if (new_nthreads <= 1) {
771 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
772 "reservation to 1 thread\n",
776 if (new_nthreads < set_nthreads) {
777 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
778 "reservation to %d threads\n",
779 master_tid, new_nthreads));
781 new_nthreads = set_nthreads;
783 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
784 if (set_nthreads > 2) {
785 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
786 new_nthreads = (new_nthreads % set_nthreads) + 1;
787 if (new_nthreads == 1) {
788 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
789 "reservation to 1 thread\n",
793 if (new_nthreads < set_nthreads) {
794 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
795 "reservation to %d threads\n",
796 master_tid, new_nthreads));
804 if (__kmp_nth + new_nthreads -
805 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
807 int tl_nthreads = __kmp_max_nth - __kmp_nth +
808 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
809 if (tl_nthreads <= 0) {
814 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
815 __kmp_reserve_warn = 1;
816 __kmp_msg(kmp_ms_warning,
817 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
818 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
820 if (tl_nthreads == 1) {
821 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
822 "reduced reservation to 1 thread\n",
826 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
827 "reservation to %d threads\n",
828 master_tid, tl_nthreads));
829 new_nthreads = tl_nthreads;
833 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
834 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
835 if (cg_nthreads + new_nthreads -
836 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
838 int tl_nthreads = max_cg_threads - cg_nthreads +
839 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
840 if (tl_nthreads <= 0) {
845 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
846 __kmp_reserve_warn = 1;
847 __kmp_msg(kmp_ms_warning,
848 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
849 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
851 if (tl_nthreads == 1) {
852 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
853 "reduced reservation to 1 thread\n",
857 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
858 "reservation to %d threads\n",
859 master_tid, tl_nthreads));
860 new_nthreads = tl_nthreads;
866 capacity = __kmp_threads_capacity;
867 if (TCR_PTR(__kmp_threads[0]) == NULL) {
873 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
874 capacity -= __kmp_hidden_helper_threads_num;
876 if (__kmp_nth + new_nthreads -
877 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
880 int slotsRequired = __kmp_nth + new_nthreads -
881 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
883 int slotsAdded = __kmp_expand_threads(slotsRequired);
884 if (slotsAdded < slotsRequired) {
886 new_nthreads -= (slotsRequired - slotsAdded);
887 KMP_ASSERT(new_nthreads >= 1);
890 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
891 __kmp_reserve_warn = 1;
892 if (__kmp_tp_cached) {
893 __kmp_msg(kmp_ms_warning,
894 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
895 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
896 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
898 __kmp_msg(kmp_ms_warning,
899 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
900 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
907 if (new_nthreads == 1) {
909 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
910 "dead roots and rechecking; requested %d threads\n",
911 __kmp_get_gtid(), set_nthreads));
913 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
915 __kmp_get_gtid(), new_nthreads, set_nthreads));
924static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
925 kmp_info_t *master_th,
int master_gtid,
926 int fork_teams_workers) {
930 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
931 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
935 master_th->th.th_info.ds.ds_tid = 0;
936 master_th->th.th_team = team;
937 master_th->th.th_team_nproc = team->t.t_nproc;
938 master_th->th.th_team_master = master_th;
939 master_th->th.th_team_serialized = FALSE;
940 master_th->th.th_dispatch = &team->t.t_dispatch[0];
943#if KMP_NESTED_HOT_TEAMS
945 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
948 int level = team->t.t_active_level - 1;
949 if (master_th->th.th_teams_microtask) {
950 if (master_th->th.th_teams_size.nteams > 1) {
954 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
955 master_th->th.th_teams_level == team->t.t_level) {
960 if (level < __kmp_hot_teams_max_level) {
961 if (hot_teams[level].hot_team) {
963 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
967 hot_teams[level].hot_team = team;
968 hot_teams[level].hot_team_nth = team->t.t_nproc;
975 use_hot_team = team == root->r.r_hot_team;
980 team->t.t_threads[0] = master_th;
981 __kmp_initialize_info(master_th, team, 0, master_gtid);
984 for (i = 1; i < team->t.t_nproc; i++) {
987 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
988 team->t.t_threads[i] = thr;
989 KMP_DEBUG_ASSERT(thr);
990 KMP_DEBUG_ASSERT(thr->th.th_team == team);
992 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
993 "T#%d(%d:%d) join =%llu, plain=%llu\n",
994 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
995 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
996 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
997 team->t.t_bar[bs_plain_barrier].b_arrived));
998 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
999 thr->th.th_teams_level = master_th->th.th_teams_level;
1000 thr->th.th_teams_size = master_th->th.th_teams_size;
1003 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1004 for (b = 0; b < bs_last_barrier; ++b) {
1005 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1006 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1008 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1014#if KMP_AFFINITY_SUPPORTED
1018 if (!fork_teams_workers) {
1019 __kmp_partition_places(team);
1023 if (team->t.t_nproc > 1 &&
1024 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1025 team->t.b->update_num_threads(team->t.t_nproc);
1026 __kmp_add_threads_to_team(team, team->t.t_nproc);
1030 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1031 for (i = 0; i < team->t.t_nproc; i++) {
1032 kmp_info_t *thr = team->t.t_threads[i];
1033 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1034 thr->th.th_prev_level != team->t.t_level) {
1035 team->t.t_display_affinity = 1;
1044#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1048inline static void propagateFPControl(kmp_team_t *team) {
1049 if (__kmp_inherit_fp_control) {
1050 kmp_int16 x87_fpu_control_word;
1054 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1055 __kmp_store_mxcsr(&mxcsr);
1056 mxcsr &= KMP_X86_MXCSR_MASK;
1067 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1068 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1071 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1075 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1081inline static void updateHWFPControl(kmp_team_t *team) {
1082 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1085 kmp_int16 x87_fpu_control_word;
1087 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1088 __kmp_store_mxcsr(&mxcsr);
1089 mxcsr &= KMP_X86_MXCSR_MASK;
1091 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1092 __kmp_clear_x87_fpu_status_word();
1093 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1096 if (team->t.t_mxcsr != mxcsr) {
1097 __kmp_load_mxcsr(&team->t.t_mxcsr);
1102#define propagateFPControl(x) ((void)0)
1103#define updateHWFPControl(x) ((void)0)
1106static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1111void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1112 kmp_info_t *this_thr;
1113 kmp_team_t *serial_team;
1115 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1122 if (!TCR_4(__kmp_init_parallel))
1123 __kmp_parallel_initialize();
1124 __kmp_resume_if_soft_paused();
1126 this_thr = __kmp_threads[global_tid];
1127 serial_team = this_thr->th.th_serial_team;
1130 KMP_DEBUG_ASSERT(serial_team);
1133 if (__kmp_tasking_mode != tskm_immediate_exec) {
1135 this_thr->th.th_task_team ==
1136 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1137 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1139 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1140 "team %p, new task_team = NULL\n",
1141 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1142 this_thr->th.th_task_team = NULL;
1145 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1146 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1147 proc_bind = proc_bind_false;
1148 }
else if (proc_bind == proc_bind_default) {
1151 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1154 this_thr->th.th_set_proc_bind = proc_bind_default;
1157 this_thr->th.th_set_nproc = 0;
1160 ompt_data_t ompt_parallel_data = ompt_data_none;
1161 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1162 if (ompt_enabled.enabled &&
1163 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1165 ompt_task_info_t *parent_task_info;
1166 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1168 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1169 if (ompt_enabled.ompt_callback_parallel_begin) {
1172 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1173 &(parent_task_info->task_data), &(parent_task_info->frame),
1174 &ompt_parallel_data, team_size,
1175 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1180 if (this_thr->th.th_team != serial_team) {
1182 int level = this_thr->th.th_team->t.t_level;
1184 if (serial_team->t.t_serialized) {
1187 kmp_team_t *new_team;
1189 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1192 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1196 proc_bind, &this_thr->th.th_current_task->td_icvs,
1197 0 USE_NESTED_HOT_ARG(NULL));
1198 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1199 KMP_ASSERT(new_team);
1202 new_team->t.t_threads[0] = this_thr;
1203 new_team->t.t_parent = this_thr->th.th_team;
1204 serial_team = new_team;
1205 this_thr->th.th_serial_team = serial_team;
1209 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1210 global_tid, serial_team));
1218 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1219 global_tid, serial_team));
1223 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1224 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1225 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1226 serial_team->t.t_ident = loc;
1227 serial_team->t.t_serialized = 1;
1228 serial_team->t.t_nproc = 1;
1229 serial_team->t.t_parent = this_thr->th.th_team;
1230 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1231 this_thr->th.th_team = serial_team;
1232 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1234 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1235 this_thr->th.th_current_task));
1236 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1237 this_thr->th.th_current_task->td_flags.executing = 0;
1239 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1244 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1245 &this_thr->th.th_current_task->td_parent->td_icvs);
1249 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1250 this_thr->th.th_current_task->td_icvs.nproc =
1251 __kmp_nested_nth.nth[level + 1];
1254 if (__kmp_nested_proc_bind.used &&
1255 (level + 1 < __kmp_nested_proc_bind.used)) {
1256 this_thr->th.th_current_task->td_icvs.proc_bind =
1257 __kmp_nested_proc_bind.bind_types[level + 1];
1261 serial_team->t.t_pkfn = (microtask_t)(~0);
1263 this_thr->th.th_info.ds.ds_tid = 0;
1266 this_thr->th.th_team_nproc = 1;
1267 this_thr->th.th_team_master = this_thr;
1268 this_thr->th.th_team_serialized = 1;
1270 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1271 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1272 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1274 propagateFPControl(serial_team);
1277 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1278 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1279 serial_team->t.t_dispatch->th_disp_buffer =
1280 (dispatch_private_info_t *)__kmp_allocate(
1281 sizeof(dispatch_private_info_t));
1283 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1290 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1291 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1292 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1293 ++serial_team->t.t_serialized;
1294 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1297 int level = this_thr->th.th_team->t.t_level;
1300 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1301 this_thr->th.th_current_task->td_icvs.nproc =
1302 __kmp_nested_nth.nth[level + 1];
1304 serial_team->t.t_level++;
1305 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1306 "of serial team %p to %d\n",
1307 global_tid, serial_team, serial_team->t.t_level));
1310 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1312 dispatch_private_info_t *disp_buffer =
1313 (dispatch_private_info_t *)__kmp_allocate(
1314 sizeof(dispatch_private_info_t));
1315 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1316 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1318 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1322 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1326 if (__kmp_display_affinity) {
1327 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1328 this_thr->th.th_prev_num_threads != 1) {
1330 __kmp_aux_display_affinity(global_tid, NULL);
1331 this_thr->th.th_prev_level = serial_team->t.t_level;
1332 this_thr->th.th_prev_num_threads = 1;
1336 if (__kmp_env_consistency_check)
1337 __kmp_push_parallel(global_tid, NULL);
1339 serial_team->t.ompt_team_info.master_return_address = codeptr;
1340 if (ompt_enabled.enabled &&
1341 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1342 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1343 OMPT_GET_FRAME_ADDRESS(0);
1345 ompt_lw_taskteam_t lw_taskteam;
1346 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1347 &ompt_parallel_data, codeptr);
1349 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1353 if (ompt_enabled.ompt_callback_implicit_task) {
1354 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1355 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1356 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1357 ompt_task_implicit);
1358 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1359 __kmp_tid_from_gtid(global_tid);
1363 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1364 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1365 OMPT_GET_FRAME_ADDRESS(0);
1371static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1372 microtask_t microtask,
int level,
1373 int teams_level, kmp_va_list ap) {
1374 return (master_th->th.th_teams_microtask && ap &&
1375 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1380static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1381 int teams_level, kmp_va_list ap) {
1382 return ((ap == NULL && active_level == 0) ||
1383 (ap && teams_level > 0 && teams_level == level));
1390__kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1391 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1392 enum fork_context_e call_context, microtask_t microtask,
1393 launch_t invoker,
int master_set_numthreads,
int level,
1395 ompt_data_t ompt_parallel_data,
void *return_address,
1401 parent_team->t.t_ident = loc;
1402 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1403 parent_team->t.t_argc = argc;
1404 argv = (
void **)parent_team->t.t_argv;
1405 for (i = argc - 1; i >= 0; --i) {
1406 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1409 if (parent_team == master_th->th.th_serial_team) {
1412 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1414 if (call_context == fork_context_gnu) {
1417 parent_team->t.t_serialized--;
1422 parent_team->t.t_pkfn = microtask;
1427 void **exit_frame_p;
1428 ompt_data_t *implicit_task_data;
1429 ompt_lw_taskteam_t lw_taskteam;
1431 if (ompt_enabled.enabled) {
1432 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1433 &ompt_parallel_data, return_address);
1434 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1436 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1440 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1441 if (ompt_enabled.ompt_callback_implicit_task) {
1442 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1443 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1444 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1445 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1449 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1451 exit_frame_p = &dummy;
1457 parent_team->t.t_serialized--;
1460 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1461 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1462 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1471 if (ompt_enabled.enabled) {
1472 *exit_frame_p = NULL;
1473 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1474 if (ompt_enabled.ompt_callback_implicit_task) {
1475 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1476 ompt_scope_end, NULL, implicit_task_data, 1,
1477 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1479 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1480 __ompt_lw_taskteam_unlink(master_th);
1481 if (ompt_enabled.ompt_callback_parallel_end) {
1482 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1483 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1484 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1486 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1492 parent_team->t.t_pkfn = microtask;
1493 parent_team->t.t_invoke = invoker;
1494 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1495 parent_team->t.t_active_level++;
1496 parent_team->t.t_level++;
1497 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1504 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1507 if (ompt_enabled.enabled) {
1508 ompt_lw_taskteam_t lw_taskteam;
1509 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1511 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1516 if (master_set_numthreads) {
1517 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1519 kmp_info_t **other_threads = parent_team->t.t_threads;
1522 int old_proc = master_th->th.th_teams_size.nth;
1523 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1524 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1525 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1527 parent_team->t.t_nproc = master_set_numthreads;
1528 for (i = 0; i < master_set_numthreads; ++i) {
1529 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1533 master_th->th.th_set_nproc = 0;
1537 if (__kmp_debugging) {
1538 int nth = __kmp_omp_num_threads(loc);
1540 master_set_numthreads = nth;
1546 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1548 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1549 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1550 proc_bind = proc_bind_false;
1553 if (proc_bind == proc_bind_default) {
1554 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1560 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1561 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1562 master_th->th.th_current_task->td_icvs.proc_bind)) {
1563 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1566 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1568 if (proc_bind_icv != proc_bind_default &&
1569 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1570 kmp_info_t **other_threads = parent_team->t.t_threads;
1571 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1572 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1576 master_th->th.th_set_proc_bind = proc_bind_default;
1578#if USE_ITT_BUILD && USE_ITT_NOTIFY
1579 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1581 __kmp_forkjoin_frames_mode == 3 &&
1582 parent_team->t.t_active_level == 1
1583 && master_th->th.th_teams_size.nteams == 1) {
1584 kmp_uint64 tmp_time = __itt_get_timestamp();
1585 master_th->th.th_frame_time = tmp_time;
1586 parent_team->t.t_region_time = tmp_time;
1588 if (__itt_stack_caller_create_ptr) {
1589 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1591 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1594#if KMP_AFFINITY_SUPPORTED
1595 __kmp_partition_places(parent_team);
1598 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1599 "master_th=%p, gtid=%d\n",
1600 root, parent_team, master_th, gtid));
1601 __kmp_internal_fork(loc, gtid, parent_team);
1602 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1603 "master_th=%p, gtid=%d\n",
1604 root, parent_team, master_th, gtid));
1606 if (call_context == fork_context_gnu)
1610 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1611 parent_team->t.t_id, parent_team->t.t_pkfn));
1613 if (!parent_team->t.t_invoke(gtid)) {
1614 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1616 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1617 parent_team->t.t_id, parent_team->t.t_pkfn));
1620 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1627__kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1628 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1629 kmp_info_t *master_th, kmp_team_t *parent_team,
1631 ompt_data_t *ompt_parallel_data,
void **return_address,
1632 ompt_data_t **parent_task_data,
1640#if KMP_OS_LINUX && \
1641 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1644 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1649 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1654 master_th->th.th_serial_team->t.t_pkfn = microtask;
1657 if (call_context == fork_context_intel) {
1659 master_th->th.th_serial_team->t.t_ident = loc;
1662 master_th->th.th_serial_team->t.t_level--;
1667 void **exit_frame_p;
1668 ompt_task_info_t *task_info;
1669 ompt_lw_taskteam_t lw_taskteam;
1671 if (ompt_enabled.enabled) {
1672 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1673 ompt_parallel_data, *return_address);
1675 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1677 task_info = OMPT_CUR_TASK_INFO(master_th);
1678 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1679 if (ompt_enabled.ompt_callback_implicit_task) {
1680 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1681 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1682 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1683 &(task_info->task_data), 1,
1684 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1688 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1690 exit_frame_p = &dummy;
1695 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1696 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1697 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1706 if (ompt_enabled.enabled) {
1707 *exit_frame_p = NULL;
1708 if (ompt_enabled.ompt_callback_implicit_task) {
1709 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1710 ompt_scope_end, NULL, &(task_info->task_data), 1,
1711 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1713 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1714 __ompt_lw_taskteam_unlink(master_th);
1715 if (ompt_enabled.ompt_callback_parallel_end) {
1716 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1717 ompt_parallel_data, *parent_task_data,
1718 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1720 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1723 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1724 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1725 team = master_th->th.th_team;
1727 team->t.t_invoke = invoker;
1728 __kmp_alloc_argv_entries(argc, team, TRUE);
1729 team->t.t_argc = argc;
1730 argv = (
void **)team->t.t_argv;
1732 for (i = argc - 1; i >= 0; --i)
1733 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1735 for (i = 0; i < argc; ++i)
1737 argv[i] = parent_team->t.t_argv[i];
1745 if (ompt_enabled.enabled) {
1746 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1747 if (ompt_enabled.ompt_callback_implicit_task) {
1748 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1749 ompt_scope_end, NULL, &(task_info->task_data), 0,
1750 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1752 if (ompt_enabled.ompt_callback_parallel_end) {
1753 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1754 ompt_parallel_data, *parent_task_data,
1755 OMPT_INVOKER(call_context) | ompt_parallel_league,
1758 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1763 for (i = argc - 1; i >= 0; --i)
1764 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1769 void **exit_frame_p;
1770 ompt_task_info_t *task_info;
1771 ompt_lw_taskteam_t lw_taskteam;
1772 ompt_data_t *implicit_task_data;
1774 if (ompt_enabled.enabled) {
1775 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1776 ompt_parallel_data, *return_address);
1777 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1779 task_info = OMPT_CUR_TASK_INFO(master_th);
1780 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1783 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1784 if (ompt_enabled.ompt_callback_implicit_task) {
1785 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1786 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1787 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1788 ompt_task_implicit);
1789 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1793 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1795 exit_frame_p = &dummy;
1800 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1801 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1802 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1811 if (ompt_enabled.enabled) {
1812 *exit_frame_p = NULL;
1813 if (ompt_enabled.ompt_callback_implicit_task) {
1814 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1815 ompt_scope_end, NULL, &(task_info->task_data), 1,
1816 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1819 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1820 __ompt_lw_taskteam_unlink(master_th);
1821 if (ompt_enabled.ompt_callback_parallel_end) {
1822 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1823 ompt_parallel_data, *parent_task_data,
1824 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1826 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1830 }
else if (call_context == fork_context_gnu) {
1832 if (ompt_enabled.enabled) {
1833 ompt_lw_taskteam_t lwt;
1834 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1837 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1838 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1844 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1847 KMP_ASSERT2(call_context < fork_context_last,
1848 "__kmp_serial_fork_call: unknown fork_context parameter");
1851 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1858int __kmp_fork_call(
ident_t *loc,
int gtid,
1859 enum fork_context_e call_context,
1860 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1865 int master_this_cons;
1867 kmp_team_t *parent_team;
1868 kmp_info_t *master_th;
1872 int master_set_numthreads;
1876#if KMP_NESTED_HOT_TEAMS
1877 kmp_hot_team_ptr_t **p_hot_teams;
1880 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1883 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1884 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1887 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1889 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1890 __kmp_stkpadding += (short)((kmp_int64)dummy);
1896 if (!TCR_4(__kmp_init_parallel))
1897 __kmp_parallel_initialize();
1898 __kmp_resume_if_soft_paused();
1903 master_th = __kmp_threads[gtid];
1905 parent_team = master_th->th.th_team;
1906 master_tid = master_th->th.th_info.ds.ds_tid;
1907 master_this_cons = master_th->th.th_local.this_construct;
1908 root = master_th->th.th_root;
1909 master_active = root->r.r_active;
1910 master_set_numthreads = master_th->th.th_set_nproc;
1913 ompt_data_t ompt_parallel_data = ompt_data_none;
1914 ompt_data_t *parent_task_data;
1915 ompt_frame_t *ompt_frame;
1916 void *return_address = NULL;
1918 if (ompt_enabled.enabled) {
1919 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1921 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1926 __kmp_assign_root_init_mask();
1929 level = parent_team->t.t_level;
1931 active_level = parent_team->t.t_active_level;
1933 teams_level = master_th->th.th_teams_level;
1934#if KMP_NESTED_HOT_TEAMS
1935 p_hot_teams = &master_th->th.th_hot_teams;
1936 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1937 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1938 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1939 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1941 (*p_hot_teams)[0].hot_team_nth = 1;
1946 if (ompt_enabled.enabled) {
1947 if (ompt_enabled.ompt_callback_parallel_begin) {
1948 int team_size = master_set_numthreads
1949 ? master_set_numthreads
1950 : get__nproc_2(parent_team, master_tid);
1951 int flags = OMPT_INVOKER(call_context) |
1952 ((microtask == (microtask_t)__kmp_teams_master)
1953 ? ompt_parallel_league
1954 : ompt_parallel_team);
1955 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1956 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1959 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1963 master_th->th.th_ident = loc;
1966 if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {
1967 return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,
1968 call_context, microtask, invoker,
1969 master_set_numthreads, level,
1971 ompt_parallel_data, return_address,
1977 if (__kmp_tasking_mode != tskm_immediate_exec) {
1978 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1979 parent_team->t.t_task_team[master_th->th.th_task_state]);
1989 __kmp_is_entering_teams(active_level, level, teams_level, ap);
1990 if ((!enter_teams &&
1991 (parent_team->t.t_active_level >=
1992 master_th->th.th_current_task->td_icvs.max_active_levels)) ||
1993 (__kmp_library == library_serial)) {
1994 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team\n", gtid));
1997 nthreads = master_set_numthreads
1998 ? master_set_numthreads
2000 : get__nproc_2(parent_team, master_tid);
2005 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2010 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
2011 nthreads, enter_teams);
2012 if (nthreads == 1) {
2016 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2020 KMP_DEBUG_ASSERT(nthreads > 0);
2023 master_th->th.th_set_nproc = 0;
2025 if (nthreads == 1) {
2026 return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,
2027 invoker, master_th, parent_team,
2029 &ompt_parallel_data, &return_address,
2037 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
2038 "curtask=%p, curtask_max_aclevel=%d\n",
2039 parent_team->t.t_active_level, master_th,
2040 master_th->th.th_current_task,
2041 master_th->th.th_current_task->td_icvs.max_active_levels));
2045 master_th->th.th_current_task->td_flags.executing = 0;
2047 if (!master_th->th.th_teams_microtask || level > teams_level) {
2049 KMP_ATOMIC_INC(&root->r.r_in_parallel);
2053 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2054 if ((level + 1 < __kmp_nested_nth.used) &&
2055 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
2056 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2062 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2064 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2065 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2066 proc_bind = proc_bind_false;
2070 if (proc_bind == proc_bind_default) {
2071 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2074 if (master_th->th.th_teams_microtask &&
2075 microtask == (microtask_t)__kmp_teams_master) {
2076 proc_bind = __kmp_teams_proc_bind;
2082 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2083 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2084 master_th->th.th_current_task->td_icvs.proc_bind)) {
2087 if (!master_th->th.th_teams_microtask ||
2088 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2089 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2094 master_th->th.th_set_proc_bind = proc_bind_default;
2096 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2097 kmp_internal_control_t new_icvs;
2098 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2099 new_icvs.next = NULL;
2100 if (nthreads_icv > 0) {
2101 new_icvs.nproc = nthreads_icv;
2103 if (proc_bind_icv != proc_bind_default) {
2104 new_icvs.proc_bind = proc_bind_icv;
2108 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2109 team = __kmp_allocate_team(root, nthreads, nthreads,
2113 proc_bind, &new_icvs,
2114 argc USE_NESTED_HOT_ARG(master_th));
2115 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2116 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2119 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2120 team = __kmp_allocate_team(root, nthreads, nthreads,
2125 &master_th->th.th_current_task->td_icvs,
2126 argc USE_NESTED_HOT_ARG(master_th));
2127 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2128 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2129 &master_th->th.th_current_task->td_icvs);
2132 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2135 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2136 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2137 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2138 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2139 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2141 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2144 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2146 if (!master_th->th.th_teams_microtask || level > teams_level) {
2147 int new_level = parent_team->t.t_level + 1;
2148 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2149 new_level = parent_team->t.t_active_level + 1;
2150 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2153 int new_level = parent_team->t.t_level;
2154 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2155 new_level = parent_team->t.t_active_level;
2156 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2158 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2160 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2162 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2163 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2166 propagateFPControl(team);
2168 if (ompd_state & OMPD_ENABLE_BP)
2169 ompd_bp_parallel_begin();
2172 if (__kmp_tasking_mode != tskm_immediate_exec) {
2175 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2176 parent_team->t.t_task_team[master_th->th.th_task_state]);
2177 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2178 "%p, new task_team %p / team %p\n",
2179 __kmp_gtid_from_thread(master_th),
2180 master_th->th.th_task_team, parent_team,
2181 team->t.t_task_team[master_th->th.th_task_state], team));
2183 if (active_level || master_th->th.th_task_team) {
2185 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2186 if (master_th->th.th_task_state_top >=
2187 master_th->th.th_task_state_stack_sz) {
2188 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2189 kmp_uint8 *old_stack, *new_stack;
2191 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2192 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2193 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2195 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2199 old_stack = master_th->th.th_task_state_memo_stack;
2200 master_th->th.th_task_state_memo_stack = new_stack;
2201 master_th->th.th_task_state_stack_sz = new_size;
2202 __kmp_free(old_stack);
2206 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2207 master_th->th.th_task_state;
2208 master_th->th.th_task_state_top++;
2209#if KMP_NESTED_HOT_TEAMS
2210 if (master_th->th.th_hot_teams &&
2211 active_level < __kmp_hot_teams_max_level &&
2212 team == master_th->th.th_hot_teams[active_level].hot_team) {
2214 master_th->th.th_task_state =
2216 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2219 master_th->th.th_task_state = 0;
2220#if KMP_NESTED_HOT_TEAMS
2224#if !KMP_NESTED_HOT_TEAMS
2225 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2226 (team == root->r.r_hot_team));
2232 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2233 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2235 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2236 (team->t.t_master_tid == 0 &&
2237 (team->t.t_parent == root->r.r_root_team ||
2238 team->t.t_parent->t.t_serialized)));
2242 argv = (
void **)team->t.t_argv;
2244 for (i = argc - 1; i >= 0; --i) {
2245 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2246 KMP_CHECK_UPDATE(*argv, new_argv);
2250 for (i = 0; i < argc; ++i) {
2252 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2257 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2258 if (!root->r.r_active)
2259 root->r.r_active = TRUE;
2261 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2262 __kmp_setup_icv_copy(team, nthreads,
2263 &master_th->th.th_current_task->td_icvs, loc);
2266 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2269 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2272 if (team->t.t_active_level == 1
2273 && !master_th->th.th_teams_microtask) {
2275 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2276 (__kmp_forkjoin_frames_mode == 3 ||
2277 __kmp_forkjoin_frames_mode == 1)) {
2278 kmp_uint64 tmp_time = 0;
2279 if (__itt_get_timestamp_ptr)
2280 tmp_time = __itt_get_timestamp();
2282 master_th->th.th_frame_time = tmp_time;
2283 if (__kmp_forkjoin_frames_mode == 3)
2284 team->t.t_region_time = tmp_time;
2288 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2289 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2291 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2297 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2300 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2301 root, team, master_th, gtid));
2304 if (__itt_stack_caller_create_ptr) {
2307 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2308 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2309 }
else if (parent_team->t.t_serialized) {
2314 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2315 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2323 __kmp_internal_fork(loc, gtid, team);
2324 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2325 "master_th=%p, gtid=%d\n",
2326 root, team, master_th, gtid));
2329 if (call_context == fork_context_gnu) {
2330 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2335 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2336 team->t.t_id, team->t.t_pkfn));
2339#if KMP_STATS_ENABLED
2343 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2347 if (!team->t.t_invoke(gtid)) {
2348 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2351#if KMP_STATS_ENABLED
2354 KMP_SET_THREAD_STATE(previous_state);
2358 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2359 team->t.t_id, team->t.t_pkfn));
2362 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2364 if (ompt_enabled.enabled) {
2365 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2373static inline void __kmp_join_restore_state(kmp_info_t *thread,
2376 thread->th.ompt_thread_info.state =
2377 ((team->t.t_serialized) ? ompt_state_work_serial
2378 : ompt_state_work_parallel);
2381static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2382 kmp_team_t *team, ompt_data_t *parallel_data,
2383 int flags,
void *codeptr) {
2384 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2385 if (ompt_enabled.ompt_callback_parallel_end) {
2386 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2387 parallel_data, &(task_info->task_data), flags, codeptr);
2390 task_info->frame.enter_frame = ompt_data_none;
2391 __kmp_join_restore_state(thread, team);
2395void __kmp_join_call(
ident_t *loc,
int gtid
2398 enum fork_context_e fork_context
2402 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2404 kmp_team_t *parent_team;
2405 kmp_info_t *master_th;
2409 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2412 master_th = __kmp_threads[gtid];
2413 root = master_th->th.th_root;
2414 team = master_th->th.th_team;
2415 parent_team = team->t.t_parent;
2417 master_th->th.th_ident = loc;
2420 void *team_microtask = (
void *)team->t.t_pkfn;
2424 if (ompt_enabled.enabled &&
2425 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2426 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2431 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2432 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2433 "th_task_team = %p\n",
2434 __kmp_gtid_from_thread(master_th), team,
2435 team->t.t_task_team[master_th->th.th_task_state],
2436 master_th->th.th_task_team));
2437 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2438 team->t.t_task_team[master_th->th.th_task_state]);
2442 if (team->t.t_serialized) {
2443 if (master_th->th.th_teams_microtask) {
2445 int level = team->t.t_level;
2446 int tlevel = master_th->th.th_teams_level;
2447 if (level == tlevel) {
2451 }
else if (level == tlevel + 1) {
2455 team->t.t_serialized++;
2461 if (ompt_enabled.enabled) {
2462 if (fork_context == fork_context_gnu) {
2463 __ompt_lw_taskteam_unlink(master_th);
2465 __kmp_join_restore_state(master_th, parent_team);
2472 master_active = team->t.t_master_active;
2477 __kmp_internal_join(loc, gtid, team);
2479 if (__itt_stack_caller_create_ptr) {
2480 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2482 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2483 team->t.t_stack_id = NULL;
2487 master_th->th.th_task_state =
2490 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2491 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2495 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2496 parent_team->t.t_stack_id = NULL;
2504 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2505 void *codeptr = team->t.ompt_team_info.master_return_address;
2510 if (team->t.t_active_level == 1 &&
2511 (!master_th->th.th_teams_microtask ||
2512 master_th->th.th_teams_size.nteams == 1)) {
2513 master_th->th.th_ident = loc;
2516 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2517 __kmp_forkjoin_frames_mode == 3)
2518 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2519 master_th->th.th_frame_time, 0, loc,
2520 master_th->th.th_team_nproc, 1);
2521 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2522 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2523 __kmp_itt_region_joined(gtid);
2527#if KMP_AFFINITY_SUPPORTED
2530 master_th->th.th_first_place = team->t.t_first_place;
2531 master_th->th.th_last_place = team->t.t_last_place;
2535 if (master_th->th.th_teams_microtask && !exit_teams &&
2536 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2537 team->t.t_level == master_th->th.th_teams_level + 1) {
2542 ompt_data_t ompt_parallel_data = ompt_data_none;
2543 if (ompt_enabled.enabled) {
2544 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2545 if (ompt_enabled.ompt_callback_implicit_task) {
2546 int ompt_team_size = team->t.t_nproc;
2547 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2548 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2549 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2551 task_info->frame.exit_frame = ompt_data_none;
2552 task_info->task_data = ompt_data_none;
2553 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2554 __ompt_lw_taskteam_unlink(master_th);
2559 team->t.t_active_level--;
2560 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2566 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2567 int old_num = master_th->th.th_team_nproc;
2568 int new_num = master_th->th.th_teams_size.nth;
2569 kmp_info_t **other_threads = team->t.t_threads;
2570 team->t.t_nproc = new_num;
2571 for (
int i = 0; i < old_num; ++i) {
2572 other_threads[i]->th.th_team_nproc = new_num;
2575 for (
int i = old_num; i < new_num; ++i) {
2577 KMP_DEBUG_ASSERT(other_threads[i]);
2578 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2579 for (
int b = 0; b < bs_last_barrier; ++b) {
2580 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2581 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2583 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2586 if (__kmp_tasking_mode != tskm_immediate_exec) {
2588 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2594 if (ompt_enabled.enabled) {
2595 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2596 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2604 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2605 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2607 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2612 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2614 if (!master_th->th.th_teams_microtask ||
2615 team->t.t_level > master_th->th.th_teams_level) {
2617 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2619 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2622 if (ompt_enabled.enabled) {
2623 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2624 if (ompt_enabled.ompt_callback_implicit_task) {
2625 int flags = (team_microtask == (
void *)__kmp_teams_master)
2627 : ompt_task_implicit;
2628 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2629 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2630 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2631 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2633 task_info->frame.exit_frame = ompt_data_none;
2634 task_info->task_data = ompt_data_none;
2638 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2640 __kmp_pop_current_task_from_thread(master_th);
2642 master_th->th.th_def_allocator = team->t.t_def_allocator;
2645 if (ompd_state & OMPD_ENABLE_BP)
2646 ompd_bp_parallel_end();
2648 updateHWFPControl(team);
2650 if (root->r.r_active != master_active)
2651 root->r.r_active = master_active;
2653 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2661 master_th->th.th_team = parent_team;
2662 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2663 master_th->th.th_team_master = parent_team->t.t_threads[0];
2664 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2667 if (parent_team->t.t_serialized &&
2668 parent_team != master_th->th.th_serial_team &&
2669 parent_team != root->r.r_root_team) {
2670 __kmp_free_team(root,
2671 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2672 master_th->th.th_serial_team = parent_team;
2675 if (__kmp_tasking_mode != tskm_immediate_exec) {
2676 if (master_th->th.th_task_state_top >
2678 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2680 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2681 master_th->th.th_task_state;
2682 --master_th->th.th_task_state_top;
2684 master_th->th.th_task_state =
2686 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2687 }
else if (team != root->r.r_hot_team) {
2692 master_th->th.th_task_state = 0;
2695 master_th->th.th_task_team =
2696 parent_team->t.t_task_team[master_th->th.th_task_state];
2698 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2699 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2706 master_th->th.th_current_task->td_flags.executing = 1;
2708 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2710#if KMP_AFFINITY_SUPPORTED
2711 if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
2712 __kmp_reset_root_init_mask(gtid);
2717 OMPT_INVOKER(fork_context) |
2718 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2719 : ompt_parallel_team);
2720 if (ompt_enabled.enabled) {
2721 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2727 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2732void __kmp_save_internal_controls(kmp_info_t *thread) {
2734 if (thread->th.th_team != thread->th.th_serial_team) {
2737 if (thread->th.th_team->t.t_serialized > 1) {
2740 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2743 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2744 thread->th.th_team->t.t_serialized) {
2749 kmp_internal_control_t *control =
2750 (kmp_internal_control_t *)__kmp_allocate(
2751 sizeof(kmp_internal_control_t));
2753 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2755 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2757 control->next = thread->th.th_team->t.t_control_stack_top;
2758 thread->th.th_team->t.t_control_stack_top = control;
2764void __kmp_set_num_threads(
int new_nth,
int gtid) {
2768 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2769 KMP_DEBUG_ASSERT(__kmp_init_serial);
2773 else if (new_nth > __kmp_max_nth)
2774 new_nth = __kmp_max_nth;
2777 thread = __kmp_threads[gtid];
2778 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2781 __kmp_save_internal_controls(thread);
2783 set__nproc(thread, new_nth);
2788 root = thread->th.th_root;
2789 if (__kmp_init_parallel && (!root->r.r_active) &&
2790 (root->r.r_hot_team->t.t_nproc > new_nth)
2791#
if KMP_NESTED_HOT_TEAMS
2792 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2795 kmp_team_t *hot_team = root->r.r_hot_team;
2798 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2800 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2801 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2804 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2805 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2806 if (__kmp_tasking_mode != tskm_immediate_exec) {
2809 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2811 __kmp_free_thread(hot_team->t.t_threads[f]);
2812 hot_team->t.t_threads[f] = NULL;
2814 hot_team->t.t_nproc = new_nth;
2815#if KMP_NESTED_HOT_TEAMS
2816 if (thread->th.th_hot_teams) {
2817 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2818 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2822 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2823 hot_team->t.b->update_num_threads(new_nth);
2824 __kmp_add_threads_to_team(hot_team, new_nth);
2827 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2830 for (f = 0; f < new_nth; f++) {
2831 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2832 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2835 hot_team->t.t_size_changed = -1;
2840void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2843 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2845 gtid, max_active_levels));
2846 KMP_DEBUG_ASSERT(__kmp_init_serial);
2849 if (max_active_levels < 0) {
2850 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2855 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2856 "max_active_levels for thread %d = (%d)\n",
2857 gtid, max_active_levels));
2860 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2865 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2866 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2867 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2873 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2874 "max_active_levels for thread %d = (%d)\n",
2875 gtid, max_active_levels));
2877 thread = __kmp_threads[gtid];
2879 __kmp_save_internal_controls(thread);
2881 set__max_active_levels(thread, max_active_levels);
2885int __kmp_get_max_active_levels(
int gtid) {
2888 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2889 KMP_DEBUG_ASSERT(__kmp_init_serial);
2891 thread = __kmp_threads[gtid];
2892 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2893 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2894 "curtask_maxaclevel=%d\n",
2895 gtid, thread->th.th_current_task,
2896 thread->th.th_current_task->td_icvs.max_active_levels));
2897 return thread->th.th_current_task->td_icvs.max_active_levels;
2901void __kmp_set_num_teams(
int num_teams) {
2903 __kmp_nteams = num_teams;
2905int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2907void __kmp_set_teams_thread_limit(
int limit) {
2909 __kmp_teams_thread_limit = limit;
2911int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2913KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2914KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2917void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2919 kmp_sched_t orig_kind;
2922 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2923 gtid, (
int)kind, chunk));
2924 KMP_DEBUG_ASSERT(__kmp_init_serial);
2931 kind = __kmp_sched_without_mods(kind);
2933 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2934 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2936 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2937 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2939 kind = kmp_sched_default;
2943 thread = __kmp_threads[gtid];
2945 __kmp_save_internal_controls(thread);
2947 if (kind < kmp_sched_upper_std) {
2948 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2951 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2953 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2954 __kmp_sch_map[kind - kmp_sched_lower - 1];
2959 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2960 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2961 kmp_sched_lower - 2];
2963 __kmp_sched_apply_mods_intkind(
2964 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2965 if (kind == kmp_sched_auto || chunk < 1) {
2967 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2969 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2974void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2978 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2979 KMP_DEBUG_ASSERT(__kmp_init_serial);
2981 thread = __kmp_threads[gtid];
2983 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2984 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2986 case kmp_sch_static_greedy:
2987 case kmp_sch_static_balanced:
2988 *kind = kmp_sched_static;
2989 __kmp_sched_apply_mods_stdkind(kind, th_type);
2992 case kmp_sch_static_chunked:
2993 *kind = kmp_sched_static;
2995 case kmp_sch_dynamic_chunked:
2996 *kind = kmp_sched_dynamic;
2999 case kmp_sch_guided_iterative_chunked:
3000 case kmp_sch_guided_analytical_chunked:
3001 *kind = kmp_sched_guided;
3004 *kind = kmp_sched_auto;
3006 case kmp_sch_trapezoidal:
3007 *kind = kmp_sched_trapezoidal;
3009#if KMP_STATIC_STEAL_ENABLED
3010 case kmp_sch_static_steal:
3011 *kind = kmp_sched_static_steal;
3015 KMP_FATAL(UnknownSchedulingType, th_type);
3018 __kmp_sched_apply_mods_stdkind(kind, th_type);
3019 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
3022int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
3028 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
3029 KMP_DEBUG_ASSERT(__kmp_init_serial);
3036 thr = __kmp_threads[gtid];
3037 team = thr->th.th_team;
3038 ii = team->t.t_level;
3042 if (thr->th.th_teams_microtask) {
3044 int tlevel = thr->th.th_teams_level;
3047 KMP_DEBUG_ASSERT(ii >= tlevel);
3059 return __kmp_tid_from_gtid(gtid);
3061 dd = team->t.t_serialized;
3063 while (ii > level) {
3064 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3066 if ((team->t.t_serialized) && (!dd)) {
3067 team = team->t.t_parent;
3071 team = team->t.t_parent;
3072 dd = team->t.t_serialized;
3077 return (dd > 1) ? (0) : (team->t.t_master_tid);
3080int __kmp_get_team_size(
int gtid,
int level) {
3086 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3087 KMP_DEBUG_ASSERT(__kmp_init_serial);
3094 thr = __kmp_threads[gtid];
3095 team = thr->th.th_team;
3096 ii = team->t.t_level;
3100 if (thr->th.th_teams_microtask) {
3102 int tlevel = thr->th.th_teams_level;
3105 KMP_DEBUG_ASSERT(ii >= tlevel);
3116 while (ii > level) {
3117 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3119 if (team->t.t_serialized && (!dd)) {
3120 team = team->t.t_parent;
3124 team = team->t.t_parent;
3129 return team->t.t_nproc;
3132kmp_r_sched_t __kmp_get_schedule_global() {
3137 kmp_r_sched_t r_sched;
3143 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3144 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3147 r_sched.r_sched_type = __kmp_static;
3150 r_sched.r_sched_type = __kmp_guided;
3152 r_sched.r_sched_type = __kmp_sched;
3154 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3156 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3158 r_sched.chunk = KMP_DEFAULT_CHUNK;
3160 r_sched.chunk = __kmp_chunk;
3168static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3170 KMP_DEBUG_ASSERT(team);
3171 if (!realloc || argc > team->t.t_max_argc) {
3173 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3174 "current entries=%d\n",
3175 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3177 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3178 __kmp_free((
void *)team->t.t_argv);
3180 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3182 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3183 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3185 team->t.t_id, team->t.t_max_argc));
3186 team->t.t_argv = &team->t.t_inline_argv[0];
3187 if (__kmp_storage_map) {
3188 __kmp_print_storage_map_gtid(
3189 -1, &team->t.t_inline_argv[0],
3190 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3191 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3196 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3197 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3199 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3201 team->t.t_id, team->t.t_max_argc));
3203 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3204 if (__kmp_storage_map) {
3205 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3206 &team->t.t_argv[team->t.t_max_argc],
3207 sizeof(
void *) * team->t.t_max_argc,
3208 "team_%d.t_argv", team->t.t_id);
3214static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3216 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3218 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3219 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3220 sizeof(dispatch_shared_info_t) * num_disp_buff);
3221 team->t.t_dispatch =
3222 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3223 team->t.t_implicit_task_taskdata =
3224 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3225 team->t.t_max_nproc = max_nth;
3228 for (i = 0; i < num_disp_buff; ++i) {
3229 team->t.t_disp_buffer[i].buffer_index = i;
3230 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3234static void __kmp_free_team_arrays(kmp_team_t *team) {
3237 for (i = 0; i < team->t.t_max_nproc; ++i) {
3238 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3239 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3240 team->t.t_dispatch[i].th_disp_buffer = NULL;
3243#if KMP_USE_HIER_SCHED
3244 __kmp_dispatch_free_hierarchies(team);
3246 __kmp_free(team->t.t_threads);
3247 __kmp_free(team->t.t_disp_buffer);
3248 __kmp_free(team->t.t_dispatch);
3249 __kmp_free(team->t.t_implicit_task_taskdata);
3250 team->t.t_threads = NULL;
3251 team->t.t_disp_buffer = NULL;
3252 team->t.t_dispatch = NULL;
3253 team->t.t_implicit_task_taskdata = 0;
3256static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3257 kmp_info_t **oldThreads = team->t.t_threads;
3259 __kmp_free(team->t.t_disp_buffer);
3260 __kmp_free(team->t.t_dispatch);
3261 __kmp_free(team->t.t_implicit_task_taskdata);
3262 __kmp_allocate_team_arrays(team, max_nth);
3264 KMP_MEMCPY(team->t.t_threads, oldThreads,
3265 team->t.t_nproc *
sizeof(kmp_info_t *));
3267 __kmp_free(oldThreads);
3270static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3272 kmp_r_sched_t r_sched =
3273 __kmp_get_schedule_global();
3275 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3277 kmp_internal_control_t g_icvs = {
3279 (kmp_int8)__kmp_global.g.g_dynamic,
3281 (kmp_int8)__kmp_env_blocktime,
3283 __kmp_dflt_blocktime,
3288 __kmp_dflt_team_nth,
3292 __kmp_dflt_max_active_levels,
3296 __kmp_nested_proc_bind.bind_types[0],
3297 __kmp_default_device,
3304static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3306 kmp_internal_control_t gx_icvs;
3307 gx_icvs.serial_nesting_level =
3309 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3310 gx_icvs.next = NULL;
3315static void __kmp_initialize_root(kmp_root_t *root) {
3317 kmp_team_t *root_team;
3318 kmp_team_t *hot_team;
3319 int hot_team_max_nth;
3320 kmp_r_sched_t r_sched =
3321 __kmp_get_schedule_global();
3322 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3323 KMP_DEBUG_ASSERT(root);
3324 KMP_ASSERT(!root->r.r_begin);
3327 __kmp_init_lock(&root->r.r_begin_lock);
3328 root->r.r_begin = FALSE;
3329 root->r.r_active = FALSE;
3330 root->r.r_in_parallel = 0;
3331 root->r.r_blocktime = __kmp_dflt_blocktime;
3332#if KMP_AFFINITY_SUPPORTED
3333 root->r.r_affinity_assigned = FALSE;
3338 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3341 __kmp_allocate_team(root,
3347 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3349 USE_NESTED_HOT_ARG(NULL)
3354 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3357 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3359 root->r.r_root_team = root_team;
3360 root_team->t.t_control_stack_top = NULL;
3363 root_team->t.t_threads[0] = NULL;
3364 root_team->t.t_nproc = 1;
3365 root_team->t.t_serialized = 1;
3367 root_team->t.t_sched.sched = r_sched.sched;
3370 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3371 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3375 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3378 __kmp_allocate_team(root,
3380 __kmp_dflt_team_nth_ub * 2,
3384 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3386 USE_NESTED_HOT_ARG(NULL)
3388 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3390 root->r.r_hot_team = hot_team;
3391 root_team->t.t_control_stack_top = NULL;
3394 hot_team->t.t_parent = root_team;
3397 hot_team_max_nth = hot_team->t.t_max_nproc;
3398 for (f = 0; f < hot_team_max_nth; ++f) {
3399 hot_team->t.t_threads[f] = NULL;
3401 hot_team->t.t_nproc = 1;
3403 hot_team->t.t_sched.sched = r_sched.sched;
3404 hot_team->t.t_size_changed = 0;
3409typedef struct kmp_team_list_item {
3410 kmp_team_p
const *entry;
3411 struct kmp_team_list_item *next;
3412} kmp_team_list_item_t;
3413typedef kmp_team_list_item_t *kmp_team_list_t;
3415static void __kmp_print_structure_team_accum(
3416 kmp_team_list_t list,
3417 kmp_team_p
const *team
3427 KMP_DEBUG_ASSERT(list != NULL);
3432 __kmp_print_structure_team_accum(list, team->t.t_parent);
3433 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3437 while (l->next != NULL && l->entry != team) {
3440 if (l->next != NULL) {
3446 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3452 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3453 sizeof(kmp_team_list_item_t));
3460static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3463 __kmp_printf(
"%s", title);
3465 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3467 __kmp_printf(
" - (nil)\n");
3471static void __kmp_print_structure_thread(
char const *title,
3472 kmp_info_p
const *thread) {
3473 __kmp_printf(
"%s", title);
3474 if (thread != NULL) {
3475 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3477 __kmp_printf(
" - (nil)\n");
3481void __kmp_print_structure(
void) {
3483 kmp_team_list_t list;
3487 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3491 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3492 "Table\n------------------------------\n");
3495 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3496 __kmp_printf(
"%2d", gtid);
3497 if (__kmp_threads != NULL) {
3498 __kmp_printf(
" %p", __kmp_threads[gtid]);
3500 if (__kmp_root != NULL) {
3501 __kmp_printf(
" %p", __kmp_root[gtid]);
3508 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3510 if (__kmp_threads != NULL) {
3512 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3513 kmp_info_t
const *thread = __kmp_threads[gtid];
3514 if (thread != NULL) {
3515 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3516 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3517 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3518 __kmp_print_structure_team(
" Serial Team: ",
3519 thread->th.th_serial_team);
3520 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3521 __kmp_print_structure_thread(
" Primary: ",
3522 thread->th.th_team_master);
3523 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3524 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3525 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3526 __kmp_print_structure_thread(
" Next in pool: ",
3527 thread->th.th_next_pool);
3529 __kmp_print_structure_team_accum(list, thread->th.th_team);
3530 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3534 __kmp_printf(
"Threads array is not allocated.\n");
3538 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3540 if (__kmp_root != NULL) {
3542 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3543 kmp_root_t
const *root = __kmp_root[gtid];
3545 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3546 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3547 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3548 __kmp_print_structure_thread(
" Uber Thread: ",
3549 root->r.r_uber_thread);
3550 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3551 __kmp_printf(
" In Parallel: %2d\n",
3552 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3554 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3555 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3559 __kmp_printf(
"Ubers array is not allocated.\n");
3562 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3564 while (list->next != NULL) {
3565 kmp_team_p
const *team = list->entry;
3567 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3568 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3569 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3570 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3571 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3572 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3573 for (i = 0; i < team->t.t_nproc; ++i) {
3574 __kmp_printf(
" Thread %2d: ", i);
3575 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3577 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3583 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3585 __kmp_print_structure_thread(
"Thread pool: ",
3586 CCAST(kmp_info_t *, __kmp_thread_pool));
3587 __kmp_print_structure_team(
"Team pool: ",
3588 CCAST(kmp_team_t *, __kmp_team_pool));
3592 while (list != NULL) {
3593 kmp_team_list_item_t *item = list;
3595 KMP_INTERNAL_FREE(item);
3604static const unsigned __kmp_primes[] = {
3605 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3606 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3607 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3608 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3609 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3610 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3611 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3612 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3613 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3614 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3615 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3619unsigned short __kmp_get_random(kmp_info_t *thread) {
3620 unsigned x = thread->th.th_x;
3621 unsigned short r = (
unsigned short)(x >> 16);
3623 thread->th.th_x = x * thread->th.th_a + 1;
3625 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3626 thread->th.th_info.ds.ds_tid, r));
3632void __kmp_init_random(kmp_info_t *thread) {
3633 unsigned seed = thread->th.th_info.ds.ds_tid;
3636 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3637 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3639 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3645static int __kmp_reclaim_dead_roots(
void) {
3648 for (i = 0; i < __kmp_threads_capacity; ++i) {
3649 if (KMP_UBER_GTID(i) &&
3650 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3653 r += __kmp_unregister_root_other_thread(i);
3678static int __kmp_expand_threads(
int nNeed) {
3680 int minimumRequiredCapacity;
3682 kmp_info_t **newThreads;
3683 kmp_root_t **newRoot;
3689#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3692 added = __kmp_reclaim_dead_roots();
3721 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3724 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3728 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3730 newCapacity = __kmp_threads_capacity;
3732 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3733 : __kmp_sys_max_nth;
3734 }
while (newCapacity < minimumRequiredCapacity);
3735 newThreads = (kmp_info_t **)__kmp_allocate(
3736 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3738 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3739 KMP_MEMCPY(newThreads, __kmp_threads,
3740 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3741 KMP_MEMCPY(newRoot, __kmp_root,
3742 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3745 kmp_old_threads_list_t *node =
3746 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3747 node->threads = __kmp_threads;
3748 node->next = __kmp_old_threads_list;
3749 __kmp_old_threads_list = node;
3751 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3752 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3753 added += newCapacity - __kmp_threads_capacity;
3754 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3756 if (newCapacity > __kmp_tp_capacity) {
3757 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3758 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3759 __kmp_threadprivate_resize_cache(newCapacity);
3761 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3763 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3772int __kmp_register_root(
int initial_thread) {
3773 kmp_info_t *root_thread;
3777 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3778 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3795 capacity = __kmp_threads_capacity;
3796 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3803 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3804 capacity -= __kmp_hidden_helper_threads_num;
3808 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3809 if (__kmp_tp_cached) {
3810 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3811 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3812 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3814 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3824 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3827 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3828 gtid <= __kmp_hidden_helper_threads_num;
3831 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3832 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3833 "hidden helper thread: T#%d\n",
3839 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3842 for (gtid = __kmp_hidden_helper_threads_num + 1;
3843 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3847 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3848 KMP_ASSERT(gtid < __kmp_threads_capacity);
3853 TCW_4(__kmp_nth, __kmp_nth + 1);
3857 if (__kmp_adjust_gtid_mode) {
3858 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3859 if (TCR_4(__kmp_gtid_mode) != 2) {
3860 TCW_4(__kmp_gtid_mode, 2);
3863 if (TCR_4(__kmp_gtid_mode) != 1) {
3864 TCW_4(__kmp_gtid_mode, 1);
3869#ifdef KMP_ADJUST_BLOCKTIME
3872 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3873 if (__kmp_nth > __kmp_avail_proc) {
3874 __kmp_zero_bt = TRUE;
3880 if (!(root = __kmp_root[gtid])) {
3881 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3882 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3885#if KMP_STATS_ENABLED
3887 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3888 __kmp_stats_thread_ptr->startLife();
3889 KMP_SET_THREAD_STATE(SERIAL_REGION);
3892 __kmp_initialize_root(root);
3895 if (root->r.r_uber_thread) {
3896 root_thread = root->r.r_uber_thread;
3898 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3899 if (__kmp_storage_map) {
3900 __kmp_print_thread_storage_map(root_thread, gtid);
3902 root_thread->th.th_info.ds.ds_gtid = gtid;
3904 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3906 root_thread->th.th_root = root;
3907 if (__kmp_env_consistency_check) {
3908 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3911 __kmp_initialize_fast_memory(root_thread);
3915 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3916 __kmp_initialize_bget(root_thread);
3918 __kmp_init_random(root_thread);
3922 if (!root_thread->th.th_serial_team) {
3923 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3924 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3925 root_thread->th.th_serial_team = __kmp_allocate_team(
3930 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3932 KMP_ASSERT(root_thread->th.th_serial_team);
3933 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3934 root_thread->th.th_serial_team));
3937 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3939 root->r.r_root_team->t.t_threads[0] = root_thread;
3940 root->r.r_hot_team->t.t_threads[0] = root_thread;
3941 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3943 root_thread->th.th_serial_team->t.t_serialized = 0;
3944 root->r.r_uber_thread = root_thread;
3947 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3948 TCW_4(__kmp_init_gtid, TRUE);
3951 __kmp_gtid_set_specific(gtid);
3954 __kmp_itt_thread_name(gtid);
3957#ifdef KMP_TDATA_GTID
3960 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3961 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3963 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3965 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3966 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3967 KMP_INIT_BARRIER_STATE));
3970 for (b = 0; b < bs_last_barrier; ++b) {
3971 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3973 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3977 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3978 KMP_INIT_BARRIER_STATE);
3980#if KMP_AFFINITY_SUPPORTED
3981 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3982 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3983 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3984 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3986 root_thread->th.th_def_allocator = __kmp_def_allocator;
3987 root_thread->th.th_prev_level = 0;
3988 root_thread->th.th_prev_num_threads = 1;
3990 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3991 tmp->cg_root = root_thread;
3992 tmp->cg_thread_limit = __kmp_cg_max_nth;
3993 tmp->cg_nthreads = 1;
3994 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3995 " cg_nthreads init to 1\n",
3998 root_thread->th.th_cg_roots = tmp;
4000 __kmp_root_counter++;
4003 if (!initial_thread && ompt_enabled.enabled) {
4005 kmp_info_t *root_thread = ompt_get_thread();
4007 ompt_set_thread_state(root_thread, ompt_state_overhead);
4009 if (ompt_enabled.ompt_callback_thread_begin) {
4010 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
4011 ompt_thread_initial, __ompt_get_thread_data_internal());
4013 ompt_data_t *task_data;
4014 ompt_data_t *parallel_data;
4015 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4017 if (ompt_enabled.ompt_callback_implicit_task) {
4018 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4019 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
4022 ompt_set_thread_state(root_thread, ompt_state_work_serial);
4026 if (ompd_state & OMPD_ENABLE_BP)
4027 ompd_bp_thread_begin();
4031 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4036#if KMP_NESTED_HOT_TEAMS
4037static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
4038 const int max_level) {
4040 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
4041 if (!hot_teams || !hot_teams[level].hot_team) {
4044 KMP_DEBUG_ASSERT(level < max_level);
4045 kmp_team_t *team = hot_teams[level].hot_team;
4046 nth = hot_teams[level].hot_team_nth;
4048 if (level < max_level - 1) {
4049 for (i = 0; i < nth; ++i) {
4050 kmp_info_t *th = team->t.t_threads[i];
4051 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
4052 if (i > 0 && th->th.th_hot_teams) {
4053 __kmp_free(th->th.th_hot_teams);
4054 th->th.th_hot_teams = NULL;
4058 __kmp_free_team(root, team, NULL);
4065static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4066 kmp_team_t *root_team = root->r.r_root_team;
4067 kmp_team_t *hot_team = root->r.r_hot_team;
4068 int n = hot_team->t.t_nproc;
4071 KMP_DEBUG_ASSERT(!root->r.r_active);
4073 root->r.r_root_team = NULL;
4074 root->r.r_hot_team = NULL;
4077 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4078#if KMP_NESTED_HOT_TEAMS
4079 if (__kmp_hot_teams_max_level >
4081 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4082 kmp_info_t *th = hot_team->t.t_threads[i];
4083 if (__kmp_hot_teams_max_level > 1) {
4084 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4086 if (th->th.th_hot_teams) {
4087 __kmp_free(th->th.th_hot_teams);
4088 th->th.th_hot_teams = NULL;
4093 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4098 if (__kmp_tasking_mode != tskm_immediate_exec) {
4099 __kmp_wait_to_unref_task_teams();
4105 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4107 (LPVOID) & (root->r.r_uber_thread->th),
4108 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4109 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4113 if (ompd_state & OMPD_ENABLE_BP)
4114 ompd_bp_thread_end();
4118 ompt_data_t *task_data;
4119 ompt_data_t *parallel_data;
4120 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4122 if (ompt_enabled.ompt_callback_implicit_task) {
4123 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4124 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4126 if (ompt_enabled.ompt_callback_thread_end) {
4127 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4128 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4134 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4135 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4137 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4138 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4141 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4142 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4143 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4144 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4145 root->r.r_uber_thread->th.th_cg_roots = NULL;
4147 __kmp_reap_thread(root->r.r_uber_thread, 1);
4151 root->r.r_uber_thread = NULL;
4153 root->r.r_begin = FALSE;
4158void __kmp_unregister_root_current_thread(
int gtid) {
4159 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4163 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4164 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4165 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4168 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4171 kmp_root_t *root = __kmp_root[gtid];
4173 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4174 KMP_ASSERT(KMP_UBER_GTID(gtid));
4175 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4176 KMP_ASSERT(root->r.r_active == FALSE);
4180 kmp_info_t *thread = __kmp_threads[gtid];
4181 kmp_team_t *team = thread->th.th_team;
4182 kmp_task_team_t *task_team = thread->th.th_task_team;
4185 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4186 task_team->tt.tt_hidden_helper_task_encountered)) {
4189 thread->th.ompt_thread_info.state = ompt_state_undefined;
4191 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4194 __kmp_reset_root(gtid, root);
4198 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4200 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4207static int __kmp_unregister_root_other_thread(
int gtid) {
4208 kmp_root_t *root = __kmp_root[gtid];
4211 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4212 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4213 KMP_ASSERT(KMP_UBER_GTID(gtid));
4214 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4215 KMP_ASSERT(root->r.r_active == FALSE);
4217 r = __kmp_reset_root(gtid, root);
4219 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4225void __kmp_task_info() {
4227 kmp_int32 gtid = __kmp_entry_gtid();
4228 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4229 kmp_info_t *this_thr = __kmp_threads[gtid];
4230 kmp_team_t *steam = this_thr->th.th_serial_team;
4231 kmp_team_t *team = this_thr->th.th_team;
4234 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4236 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4237 team->t.t_implicit_task_taskdata[tid].td_parent);
4244static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4245 int tid,
int gtid) {
4249 KMP_DEBUG_ASSERT(this_thr != NULL);
4250 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4251 KMP_DEBUG_ASSERT(team);
4252 KMP_DEBUG_ASSERT(team->t.t_threads);
4253 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4254 kmp_info_t *master = team->t.t_threads[0];
4255 KMP_DEBUG_ASSERT(master);
4256 KMP_DEBUG_ASSERT(master->th.th_root);
4260 TCW_SYNC_PTR(this_thr->th.th_team, team);
4262 this_thr->th.th_info.ds.ds_tid = tid;
4263 this_thr->th.th_set_nproc = 0;
4264 if (__kmp_tasking_mode != tskm_immediate_exec)
4267 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4269 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4270 this_thr->th.th_set_proc_bind = proc_bind_default;
4271#if KMP_AFFINITY_SUPPORTED
4272 this_thr->th.th_new_place = this_thr->th.th_current_place;
4274 this_thr->th.th_root = master->th.th_root;
4277 this_thr->th.th_team_nproc = team->t.t_nproc;
4278 this_thr->th.th_team_master = master;
4279 this_thr->th.th_team_serialized = team->t.t_serialized;
4281 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4283 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4284 tid, gtid, this_thr, this_thr->th.th_current_task));
4286 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4289 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4290 tid, gtid, this_thr, this_thr->th.th_current_task));
4295 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4297 this_thr->th.th_local.this_construct = 0;
4299 if (!this_thr->th.th_pri_common) {
4300 this_thr->th.th_pri_common =
4301 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4302 if (__kmp_storage_map) {
4303 __kmp_print_storage_map_gtid(
4304 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4305 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4307 this_thr->th.th_pri_head = NULL;
4310 if (this_thr != master &&
4311 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4313 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4314 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4317 int i = tmp->cg_nthreads--;
4318 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4319 " on node %p of thread %p to %d\n",
4320 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4325 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4327 this_thr->th.th_cg_roots->cg_nthreads++;
4328 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4329 " node %p of thread %p to %d\n",
4330 this_thr, this_thr->th.th_cg_roots,
4331 this_thr->th.th_cg_roots->cg_root,
4332 this_thr->th.th_cg_roots->cg_nthreads));
4333 this_thr->th.th_current_task->td_icvs.thread_limit =
4334 this_thr->th.th_cg_roots->cg_thread_limit;
4339 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4342 sizeof(dispatch_private_info_t) *
4343 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4344 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4345 team->t.t_max_nproc));
4346 KMP_ASSERT(dispatch);
4347 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4348 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4350 dispatch->th_disp_index = 0;
4351 dispatch->th_doacross_buf_idx = 0;
4352 if (!dispatch->th_disp_buffer) {
4353 dispatch->th_disp_buffer =
4354 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4356 if (__kmp_storage_map) {
4357 __kmp_print_storage_map_gtid(
4358 gtid, &dispatch->th_disp_buffer[0],
4359 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4361 : __kmp_dispatch_num_buffers],
4363 "th_%d.th_dispatch.th_disp_buffer "
4364 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4365 gtid, team->t.t_id, gtid);
4368 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4371 dispatch->th_dispatch_pr_current = 0;
4372 dispatch->th_dispatch_sh_current = 0;
4374 dispatch->th_deo_fcn = 0;
4375 dispatch->th_dxo_fcn = 0;
4378 this_thr->th.th_next_pool = NULL;
4380 if (!this_thr->th.th_task_state_memo_stack) {
4382 this_thr->th.th_task_state_memo_stack =
4383 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4384 this_thr->th.th_task_state_top = 0;
4385 this_thr->th.th_task_state_stack_sz = 4;
4386 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4388 this_thr->th.th_task_state_memo_stack[i] = 0;
4391 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4392 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4402kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4404 kmp_team_t *serial_team;
4405 kmp_info_t *new_thr;
4408 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4409 KMP_DEBUG_ASSERT(root && team);
4410#if !KMP_NESTED_HOT_TEAMS
4411 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4416 if (__kmp_thread_pool) {
4417 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4418 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4419 if (new_thr == __kmp_thread_pool_insert_pt) {
4420 __kmp_thread_pool_insert_pt = NULL;
4422 TCW_4(new_thr->th.th_in_pool, FALSE);
4423 __kmp_suspend_initialize_thread(new_thr);
4424 __kmp_lock_suspend_mx(new_thr);
4425 if (new_thr->th.th_active_in_pool == TRUE) {
4426 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4427 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4428 new_thr->th.th_active_in_pool = FALSE;
4430 __kmp_unlock_suspend_mx(new_thr);
4432 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4433 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4434 KMP_ASSERT(!new_thr->th.th_team);
4435 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4438 __kmp_initialize_info(new_thr, team, new_tid,
4439 new_thr->th.th_info.ds.ds_gtid);
4440 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4442 TCW_4(__kmp_nth, __kmp_nth + 1);
4444 new_thr->th.th_task_state = 0;
4445 new_thr->th.th_task_state_top = 0;
4446 new_thr->th.th_task_state_stack_sz = 4;
4448 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4450 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4454#ifdef KMP_ADJUST_BLOCKTIME
4457 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4458 if (__kmp_nth > __kmp_avail_proc) {
4459 __kmp_zero_bt = TRUE;
4468 kmp_balign_t *balign = new_thr->th.th_bar;
4469 for (b = 0; b < bs_last_barrier; ++b)
4470 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4473 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4474 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4481 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4482 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4487 if (!TCR_4(__kmp_init_monitor)) {
4488 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4489 if (!TCR_4(__kmp_init_monitor)) {
4490 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4491 TCW_4(__kmp_init_monitor, 1);
4492 __kmp_create_monitor(&__kmp_monitor);
4493 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4504 while (TCR_4(__kmp_init_monitor) < 2) {
4507 KF_TRACE(10, (
"after monitor thread has started\n"));
4510 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4517 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4519 : __kmp_hidden_helper_threads_num + 1;
4521 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4523 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4526 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4527 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4532 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4534 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4536#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4539 __itt_suppress_mark_range(
4540 __itt_suppress_range, __itt_suppress_threading_errors,
4541 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4542 __itt_suppress_mark_range(
4543 __itt_suppress_range, __itt_suppress_threading_errors,
4544 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4546 __itt_suppress_mark_range(
4547 __itt_suppress_range, __itt_suppress_threading_errors,
4548 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4550 __itt_suppress_mark_range(__itt_suppress_range,
4551 __itt_suppress_threading_errors,
4552 &new_thr->th.th_suspend_init_count,
4553 sizeof(new_thr->th.th_suspend_init_count));
4556 __itt_suppress_mark_range(__itt_suppress_range,
4557 __itt_suppress_threading_errors,
4558 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4559 sizeof(new_thr->th.th_bar[0].bb.b_go));
4560 __itt_suppress_mark_range(__itt_suppress_range,
4561 __itt_suppress_threading_errors,
4562 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4563 sizeof(new_thr->th.th_bar[1].bb.b_go));
4564 __itt_suppress_mark_range(__itt_suppress_range,
4565 __itt_suppress_threading_errors,
4566 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4567 sizeof(new_thr->th.th_bar[2].bb.b_go));
4569 if (__kmp_storage_map) {
4570 __kmp_print_thread_storage_map(new_thr, new_gtid);
4575 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4576 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4577 new_thr->th.th_serial_team = serial_team =
4578 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4582 proc_bind_default, &r_icvs,
4583 0 USE_NESTED_HOT_ARG(NULL));
4585 KMP_ASSERT(serial_team);
4586 serial_team->t.t_serialized = 0;
4588 serial_team->t.t_threads[0] = new_thr;
4590 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4594 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4597 __kmp_initialize_fast_memory(new_thr);
4601 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4602 __kmp_initialize_bget(new_thr);
4605 __kmp_init_random(new_thr);
4609 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4610 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4613 kmp_balign_t *balign = new_thr->th.th_bar;
4614 for (b = 0; b < bs_last_barrier; ++b) {
4615 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4616 balign[b].bb.team = NULL;
4617 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4618 balign[b].bb.use_oncore_barrier = 0;
4621 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4622 new_thr->th.th_sleep_loc_type = flag_unset;
4624 new_thr->th.th_spin_here = FALSE;
4625 new_thr->th.th_next_waiting = 0;
4627 new_thr->th.th_blocking =
false;
4630#if KMP_AFFINITY_SUPPORTED
4631 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4632 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4633 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4634 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4636 new_thr->th.th_def_allocator = __kmp_def_allocator;
4637 new_thr->th.th_prev_level = 0;
4638 new_thr->th.th_prev_num_threads = 1;
4640 TCW_4(new_thr->th.th_in_pool, FALSE);
4641 new_thr->th.th_active_in_pool = FALSE;
4642 TCW_4(new_thr->th.th_active, TRUE);
4650 if (__kmp_adjust_gtid_mode) {
4651 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4652 if (TCR_4(__kmp_gtid_mode) != 2) {
4653 TCW_4(__kmp_gtid_mode, 2);
4656 if (TCR_4(__kmp_gtid_mode) != 1) {
4657 TCW_4(__kmp_gtid_mode, 1);
4662#ifdef KMP_ADJUST_BLOCKTIME
4665 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4666 if (__kmp_nth > __kmp_avail_proc) {
4667 __kmp_zero_bt = TRUE;
4674 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4675 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4677 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4679 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4690static void __kmp_reinitialize_team(kmp_team_t *team,
4691 kmp_internal_control_t *new_icvs,
4693 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4694 team->t.t_threads[0], team));
4695 KMP_DEBUG_ASSERT(team && new_icvs);
4696 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4697 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4699 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4701 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4702 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4704 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4705 team->t.t_threads[0], team));
4711static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4712 kmp_internal_control_t *new_icvs,
4714 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4717 KMP_DEBUG_ASSERT(team);
4718 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4719 KMP_DEBUG_ASSERT(team->t.t_threads);
4722 team->t.t_master_tid = 0;
4724 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4725 team->t.t_nproc = new_nproc;
4728 team->t.t_next_pool = NULL;
4732 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4733 team->t.t_invoke = NULL;
4736 team->t.t_sched.sched = new_icvs->sched.sched;
4738#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4739 team->t.t_fp_control_saved = FALSE;
4740 team->t.t_x87_fpu_control_word = 0;
4741 team->t.t_mxcsr = 0;
4744 team->t.t_construct = 0;
4746 team->t.t_ordered.dt.t_value = 0;
4747 team->t.t_master_active = FALSE;
4750 team->t.t_copypriv_data = NULL;
4753 team->t.t_copyin_counter = 0;
4756 team->t.t_control_stack_top = NULL;
4758 __kmp_reinitialize_team(team, new_icvs, loc);
4761 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4764#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
4767__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4768 if (KMP_AFFINITY_CAPABLE()) {
4770 if (old_mask != NULL) {
4771 status = __kmp_get_system_affinity(old_mask, TRUE);
4774 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4778 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4783#if KMP_AFFINITY_SUPPORTED
4789static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4791 if (KMP_HIDDEN_HELPER_TEAM(team))
4794 kmp_info_t *master_th = team->t.t_threads[0];
4795 KMP_DEBUG_ASSERT(master_th != NULL);
4796 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4797 int first_place = master_th->th.th_first_place;
4798 int last_place = master_th->th.th_last_place;
4799 int masters_place = master_th->th.th_current_place;
4800 int num_masks = __kmp_affinity.num_masks;
4801 team->t.t_first_place = first_place;
4802 team->t.t_last_place = last_place;
4804 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4805 "bound to place %d partition = [%d,%d]\n",
4806 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4807 team->t.t_id, masters_place, first_place, last_place));
4809 switch (proc_bind) {
4811 case proc_bind_default:
4814 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4817 case proc_bind_primary: {
4819 int n_th = team->t.t_nproc;
4820 for (f = 1; f < n_th; f++) {
4821 kmp_info_t *th = team->t.t_threads[f];
4822 KMP_DEBUG_ASSERT(th != NULL);
4823 th->th.th_first_place = first_place;
4824 th->th.th_last_place = last_place;
4825 th->th.th_new_place = masters_place;
4826 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4827 team->t.t_display_affinity != 1) {
4828 team->t.t_display_affinity = 1;
4831 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4832 "partition = [%d,%d]\n",
4833 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4834 f, masters_place, first_place, last_place));
4838 case proc_bind_close: {
4840 int n_th = team->t.t_nproc;
4842 if (first_place <= last_place) {
4843 n_places = last_place - first_place + 1;
4845 n_places = num_masks - first_place + last_place + 1;
4847 if (n_th <= n_places) {
4848 int place = masters_place;
4849 for (f = 1; f < n_th; f++) {
4850 kmp_info_t *th = team->t.t_threads[f];
4851 KMP_DEBUG_ASSERT(th != NULL);
4853 if (place == last_place) {
4854 place = first_place;
4855 }
else if (place == (num_masks - 1)) {
4860 th->th.th_first_place = first_place;
4861 th->th.th_last_place = last_place;
4862 th->th.th_new_place = place;
4863 if (__kmp_display_affinity && place != th->th.th_current_place &&
4864 team->t.t_display_affinity != 1) {
4865 team->t.t_display_affinity = 1;
4868 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4869 "partition = [%d,%d]\n",
4870 __kmp_gtid_from_thread(team->t.t_threads[f]),
4871 team->t.t_id, f, place, first_place, last_place));
4874 int S, rem, gap, s_count;
4875 S = n_th / n_places;
4877 rem = n_th - (S * n_places);
4878 gap = rem > 0 ? n_places / rem : n_places;
4879 int place = masters_place;
4881 for (f = 0; f < n_th; f++) {
4882 kmp_info_t *th = team->t.t_threads[f];
4883 KMP_DEBUG_ASSERT(th != NULL);
4885 th->th.th_first_place = first_place;
4886 th->th.th_last_place = last_place;
4887 th->th.th_new_place = place;
4888 if (__kmp_display_affinity && place != th->th.th_current_place &&
4889 team->t.t_display_affinity != 1) {
4890 team->t.t_display_affinity = 1;
4894 if ((s_count == S) && rem && (gap_ct == gap)) {
4896 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4898 if (place == last_place) {
4899 place = first_place;
4900 }
else if (place == (num_masks - 1)) {
4908 }
else if (s_count == S) {
4909 if (place == last_place) {
4910 place = first_place;
4911 }
else if (place == (num_masks - 1)) {
4921 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4922 "partition = [%d,%d]\n",
4923 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4924 th->th.th_new_place, first_place, last_place));
4926 KMP_DEBUG_ASSERT(place == masters_place);
4930 case proc_bind_spread: {
4932 int n_th = team->t.t_nproc;
4935 if (first_place <= last_place) {
4936 n_places = last_place - first_place + 1;
4938 n_places = num_masks - first_place + last_place + 1;
4940 if (n_th <= n_places) {
4943 if (n_places != num_masks) {
4944 int S = n_places / n_th;
4945 int s_count, rem, gap, gap_ct;
4947 place = masters_place;
4948 rem = n_places - n_th * S;
4949 gap = rem ? n_th / rem : 1;
4952 if (update_master_only == 1)
4954 for (f = 0; f < thidx; f++) {
4955 kmp_info_t *th = team->t.t_threads[f];
4956 KMP_DEBUG_ASSERT(th != NULL);
4958 th->th.th_first_place = place;
4959 th->th.th_new_place = place;
4960 if (__kmp_display_affinity && place != th->th.th_current_place &&
4961 team->t.t_display_affinity != 1) {
4962 team->t.t_display_affinity = 1;
4965 while (s_count < S) {
4966 if (place == last_place) {
4967 place = first_place;
4968 }
else if (place == (num_masks - 1)) {
4975 if (rem && (gap_ct == gap)) {
4976 if (place == last_place) {
4977 place = first_place;
4978 }
else if (place == (num_masks - 1)) {
4986 th->th.th_last_place = place;
4989 if (place == last_place) {
4990 place = first_place;
4991 }
else if (place == (num_masks - 1)) {
4998 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4999 "partition = [%d,%d], num_masks: %u\n",
5000 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
5001 f, th->th.th_new_place, th->th.th_first_place,
5002 th->th.th_last_place, num_masks));
5008 double current =
static_cast<double>(masters_place);
5010 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
5015 if (update_master_only == 1)
5017 for (f = 0; f < thidx; f++) {
5018 first =
static_cast<int>(current);
5019 last =
static_cast<int>(current + spacing) - 1;
5020 KMP_DEBUG_ASSERT(last >= first);
5021 if (first >= n_places) {
5022 if (masters_place) {
5025 if (first == (masters_place + 1)) {
5026 KMP_DEBUG_ASSERT(f == n_th);
5029 if (last == masters_place) {
5030 KMP_DEBUG_ASSERT(f == (n_th - 1));
5034 KMP_DEBUG_ASSERT(f == n_th);
5039 if (last >= n_places) {
5040 last = (n_places - 1);
5045 KMP_DEBUG_ASSERT(0 <= first);
5046 KMP_DEBUG_ASSERT(n_places > first);
5047 KMP_DEBUG_ASSERT(0 <= last);
5048 KMP_DEBUG_ASSERT(n_places > last);
5049 KMP_DEBUG_ASSERT(last_place >= first_place);
5050 th = team->t.t_threads[f];
5051 KMP_DEBUG_ASSERT(th);
5052 th->th.th_first_place = first;
5053 th->th.th_new_place = place;
5054 th->th.th_last_place = last;
5055 if (__kmp_display_affinity && place != th->th.th_current_place &&
5056 team->t.t_display_affinity != 1) {
5057 team->t.t_display_affinity = 1;
5060 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5061 "partition = [%d,%d], spacing = %.4f\n",
5062 __kmp_gtid_from_thread(team->t.t_threads[f]),
5063 team->t.t_id, f, th->th.th_new_place,
5064 th->th.th_first_place, th->th.th_last_place, spacing));
5068 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5070 int S, rem, gap, s_count;
5071 S = n_th / n_places;
5073 rem = n_th - (S * n_places);
5074 gap = rem > 0 ? n_places / rem : n_places;
5075 int place = masters_place;
5078 if (update_master_only == 1)
5080 for (f = 0; f < thidx; f++) {
5081 kmp_info_t *th = team->t.t_threads[f];
5082 KMP_DEBUG_ASSERT(th != NULL);
5084 th->th.th_first_place = place;
5085 th->th.th_last_place = place;
5086 th->th.th_new_place = place;
5087 if (__kmp_display_affinity && place != th->th.th_current_place &&
5088 team->t.t_display_affinity != 1) {
5089 team->t.t_display_affinity = 1;
5093 if ((s_count == S) && rem && (gap_ct == gap)) {
5095 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5097 if (place == last_place) {
5098 place = first_place;
5099 }
else if (place == (num_masks - 1)) {
5107 }
else if (s_count == S) {
5108 if (place == last_place) {
5109 place = first_place;
5110 }
else if (place == (num_masks - 1)) {
5119 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5120 "partition = [%d,%d]\n",
5121 __kmp_gtid_from_thread(team->t.t_threads[f]),
5122 team->t.t_id, f, th->th.th_new_place,
5123 th->th.th_first_place, th->th.th_last_place));
5125 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5133 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5141__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5143 ompt_data_t ompt_parallel_data,
5145 kmp_proc_bind_t new_proc_bind,
5146 kmp_internal_control_t *new_icvs,
5147 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5148 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5151 int use_hot_team = !root->r.r_active;
5153 int do_place_partition = 1;
5155 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5156 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5157 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5160#if KMP_NESTED_HOT_TEAMS
5161 kmp_hot_team_ptr_t *hot_teams;
5163 team = master->th.th_team;
5164 level = team->t.t_active_level;
5165 if (master->th.th_teams_microtask) {
5166 if (master->th.th_teams_size.nteams > 1 &&
5169 (microtask_t)__kmp_teams_master ||
5170 master->th.th_teams_level <
5177 if ((master->th.th_teams_size.nteams == 1 &&
5178 master->th.th_teams_level >= team->t.t_level) ||
5179 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5180 do_place_partition = 0;
5182 hot_teams = master->th.th_hot_teams;
5183 if (level < __kmp_hot_teams_max_level && hot_teams &&
5184 hot_teams[level].hot_team) {
5192 KMP_DEBUG_ASSERT(new_nproc == 1);
5196 if (use_hot_team && new_nproc > 1) {
5197 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5198#if KMP_NESTED_HOT_TEAMS
5199 team = hot_teams[level].hot_team;
5201 team = root->r.r_hot_team;
5204 if (__kmp_tasking_mode != tskm_immediate_exec) {
5205 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5206 "task_team[1] = %p before reinit\n",
5207 team->t.t_task_team[0], team->t.t_task_team[1]));
5211 if (team->t.t_nproc != new_nproc &&
5212 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5214 int old_nthr = team->t.t_nproc;
5215 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5220 if (do_place_partition == 0)
5221 team->t.t_proc_bind = proc_bind_default;
5225 if (team->t.t_nproc == new_nproc) {
5226 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5229 if (team->t.t_size_changed == -1) {
5230 team->t.t_size_changed = 1;
5232 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5236 kmp_r_sched_t new_sched = new_icvs->sched;
5238 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5240 __kmp_reinitialize_team(team, new_icvs,
5241 root->r.r_uber_thread->th.th_ident);
5243 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5244 team->t.t_threads[0], team));
5245 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5247#if KMP_AFFINITY_SUPPORTED
5248 if ((team->t.t_size_changed == 0) &&
5249 (team->t.t_proc_bind == new_proc_bind)) {
5250 if (new_proc_bind == proc_bind_spread) {
5251 if (do_place_partition) {
5253 __kmp_partition_places(team, 1);
5256 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5257 "proc_bind = %d, partition = [%d,%d]\n",
5258 team->t.t_id, new_proc_bind, team->t.t_first_place,
5259 team->t.t_last_place));
5261 if (do_place_partition) {
5262 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5263 __kmp_partition_places(team);
5267 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5269 }
else if (team->t.t_nproc > new_nproc) {
5271 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5274 team->t.t_size_changed = 1;
5275 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5278 __kmp_add_threads_to_team(team, new_nproc);
5280#if KMP_NESTED_HOT_TEAMS
5281 if (__kmp_hot_teams_mode == 0) {
5284 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5285 hot_teams[level].hot_team_nth = new_nproc;
5288 for (f = new_nproc; f < team->t.t_nproc; f++) {
5289 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5290 if (__kmp_tasking_mode != tskm_immediate_exec) {
5293 team->t.t_threads[f]->th.th_task_team = NULL;
5295 __kmp_free_thread(team->t.t_threads[f]);
5296 team->t.t_threads[f] = NULL;
5298#if KMP_NESTED_HOT_TEAMS
5303 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5304 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5305 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5306 for (
int b = 0; b < bs_last_barrier; ++b) {
5307 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5308 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5310 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5315 team->t.t_nproc = new_nproc;
5317 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5318 __kmp_reinitialize_team(team, new_icvs,
5319 root->r.r_uber_thread->th.th_ident);
5322 for (f = 0; f < new_nproc; ++f) {
5323 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5328 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5329 team->t.t_threads[0], team));
5331 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5334 for (f = 0; f < team->t.t_nproc; f++) {
5335 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5336 team->t.t_threads[f]->th.th_team_nproc ==
5341 if (do_place_partition) {
5342 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5343#if KMP_AFFINITY_SUPPORTED
5344 __kmp_partition_places(team);
5348#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5349 kmp_affin_mask_t *old_mask;
5350 if (KMP_AFFINITY_CAPABLE()) {
5351 KMP_CPU_ALLOC(old_mask);
5356 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5358 int old_nproc = team->t.t_nproc;
5359 team->t.t_size_changed = 1;
5361#if KMP_NESTED_HOT_TEAMS
5362 int avail_threads = hot_teams[level].hot_team_nth;
5363 if (new_nproc < avail_threads)
5364 avail_threads = new_nproc;
5365 kmp_info_t **other_threads = team->t.t_threads;
5366 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5370 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5371 for (b = 0; b < bs_last_barrier; ++b) {
5372 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5373 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5375 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5379 if (hot_teams[level].hot_team_nth >= new_nproc) {
5382 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5383 team->t.t_nproc = new_nproc;
5387 team->t.t_nproc = hot_teams[level].hot_team_nth;
5388 hot_teams[level].hot_team_nth = new_nproc;
5390 if (team->t.t_max_nproc < new_nproc) {
5392 __kmp_reallocate_team_arrays(team, new_nproc);
5393 __kmp_reinitialize_team(team, new_icvs, NULL);
5396#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5402 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5406 for (f = team->t.t_nproc; f < new_nproc; f++) {
5407 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5408 KMP_DEBUG_ASSERT(new_worker);
5409 team->t.t_threads[f] = new_worker;
5412 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5413 "join=%llu, plain=%llu\n",
5414 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5415 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5416 team->t.t_bar[bs_plain_barrier].b_arrived));
5420 kmp_balign_t *balign = new_worker->th.th_bar;
5421 for (b = 0; b < bs_last_barrier; ++b) {
5422 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5423 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5424 KMP_BARRIER_PARENT_FLAG);
5426 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5432#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5433 if (KMP_AFFINITY_CAPABLE()) {
5435 __kmp_set_system_affinity(old_mask, TRUE);
5436 KMP_CPU_FREE(old_mask);
5439#if KMP_NESTED_HOT_TEAMS
5442 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5445 __kmp_add_threads_to_team(team, new_nproc);
5449 __kmp_initialize_team(team, new_nproc, new_icvs,
5450 root->r.r_uber_thread->th.th_ident);
5453 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5454 for (f = 0; f < team->t.t_nproc; ++f)
5455 __kmp_initialize_info(team->t.t_threads[f], team, f,
5456 __kmp_gtid_from_tid(f, team));
5459 kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;
5460 for (f = old_nproc; f < team->t.t_nproc; ++f)
5461 team->t.t_threads[f]->th.th_task_state = old_state;
5464 for (f = 0; f < team->t.t_nproc; ++f) {
5465 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5466 team->t.t_threads[f]->th.th_team_nproc ==
5471 if (do_place_partition) {
5472 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5473#if KMP_AFFINITY_SUPPORTED
5474 __kmp_partition_places(team);
5479 kmp_info_t *master = team->t.t_threads[0];
5480 if (master->th.th_teams_microtask) {
5481 for (f = 1; f < new_nproc; ++f) {
5483 kmp_info_t *thr = team->t.t_threads[f];
5484 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5485 thr->th.th_teams_level = master->th.th_teams_level;
5486 thr->th.th_teams_size = master->th.th_teams_size;
5489#if KMP_NESTED_HOT_TEAMS
5493 for (f = 1; f < new_nproc; ++f) {
5494 kmp_info_t *thr = team->t.t_threads[f];
5496 kmp_balign_t *balign = thr->th.th_bar;
5497 for (b = 0; b < bs_last_barrier; ++b) {
5498 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5499 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5501 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5509 __kmp_alloc_argv_entries(argc, team, TRUE);
5510 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5514 KF_TRACE(10, (
" hot_team = %p\n", team));
5517 if (__kmp_tasking_mode != tskm_immediate_exec) {
5518 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5519 "task_team[1] = %p after reinit\n",
5520 team->t.t_task_team[0], team->t.t_task_team[1]));
5525 __ompt_team_assign_id(team, ompt_parallel_data);
5535 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5538 if (team->t.t_max_nproc >= max_nproc) {
5540 __kmp_team_pool = team->t.t_next_pool;
5542 if (max_nproc > 1 &&
5543 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5545 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5550 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5552 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5553 "task_team[1] %p to NULL\n",
5554 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5555 team->t.t_task_team[0] = NULL;
5556 team->t.t_task_team[1] = NULL;
5559 __kmp_alloc_argv_entries(argc, team, TRUE);
5560 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5563 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5564 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5567 for (b = 0; b < bs_last_barrier; ++b) {
5568 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5570 team->t.t_bar[b].b_master_arrived = 0;
5571 team->t.t_bar[b].b_team_arrived = 0;
5576 team->t.t_proc_bind = new_proc_bind;
5578 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5582 __ompt_team_assign_id(team, ompt_parallel_data);
5594 team = __kmp_reap_team(team);
5595 __kmp_team_pool = team;
5600 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5603 team->t.t_max_nproc = max_nproc;
5604 if (max_nproc > 1 &&
5605 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5607 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5612 __kmp_allocate_team_arrays(team, max_nproc);
5614 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5615 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5617 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5619 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5620 team->t.t_task_team[0] = NULL;
5622 team->t.t_task_team[1] = NULL;
5625 if (__kmp_storage_map) {
5626 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5630 __kmp_alloc_argv_entries(argc, team, FALSE);
5631 team->t.t_argc = argc;
5634 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5635 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5638 for (b = 0; b < bs_last_barrier; ++b) {
5639 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5641 team->t.t_bar[b].b_master_arrived = 0;
5642 team->t.t_bar[b].b_team_arrived = 0;
5647 team->t.t_proc_bind = new_proc_bind;
5650 __ompt_team_assign_id(team, ompt_parallel_data);
5651 team->t.ompt_serialized_team_info = NULL;
5656 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5667void __kmp_free_team(kmp_root_t *root,
5668 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5670 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5674 KMP_DEBUG_ASSERT(root);
5675 KMP_DEBUG_ASSERT(team);
5676 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5677 KMP_DEBUG_ASSERT(team->t.t_threads);
5679 int use_hot_team = team == root->r.r_hot_team;
5680#if KMP_NESTED_HOT_TEAMS
5683 level = team->t.t_active_level - 1;
5684 if (master->th.th_teams_microtask) {
5685 if (master->th.th_teams_size.nteams > 1) {
5689 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5690 master->th.th_teams_level == team->t.t_level) {
5696 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5698 if (level < __kmp_hot_teams_max_level) {
5699 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5706 TCW_SYNC_PTR(team->t.t_pkfn,
5709 team->t.t_copyin_counter = 0;
5714 if (!use_hot_team) {
5715 if (__kmp_tasking_mode != tskm_immediate_exec) {
5717 for (f = 1; f < team->t.t_nproc; ++f) {
5718 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5719 kmp_info_t *th = team->t.t_threads[f];
5720 volatile kmp_uint32 *state = &th->th.th_reap_state;
5721 while (*state != KMP_SAFE_TO_REAP) {
5725 if (!__kmp_is_thread_alive(th, &ecode)) {
5726 *state = KMP_SAFE_TO_REAP;
5731 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5732 if (fl.is_sleeping())
5733 fl.resume(__kmp_gtid_from_thread(th));
5740 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5741 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5742 if (task_team != NULL) {
5743 for (f = 0; f < team->t.t_nproc; ++f) {
5744 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5745 team->t.t_threads[f]->th.th_task_team = NULL;
5749 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5750 __kmp_get_gtid(), task_team, team->t.t_id));
5751#if KMP_NESTED_HOT_TEAMS
5752 __kmp_free_task_team(master, task_team);
5754 team->t.t_task_team[tt_idx] = NULL;
5760 team->t.t_parent = NULL;
5761 team->t.t_level = 0;
5762 team->t.t_active_level = 0;
5765 for (f = 1; f < team->t.t_nproc; ++f) {
5766 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5767 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5768 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5771 __kmp_free_thread(team->t.t_threads[f]);
5774 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5777 team->t.b->go_release();
5778 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5779 for (f = 1; f < team->t.t_nproc; ++f) {
5780 if (team->t.b->sleep[f].sleep) {
5781 __kmp_atomic_resume_64(
5782 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5783 (kmp_atomic_flag_64<> *)NULL);
5788 for (
int f = 1; f < team->t.t_nproc; ++f) {
5789 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5795 for (f = 1; f < team->t.t_nproc; ++f) {
5796 team->t.t_threads[f] = NULL;
5799 if (team->t.t_max_nproc > 1 &&
5800 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5801 distributedBarrier::deallocate(team->t.b);
5806 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5807 __kmp_team_pool = (
volatile kmp_team_t *)team;
5810 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5811 team->t.t_threads[1]->th.th_cg_roots);
5812 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5814 for (f = 1; f < team->t.t_nproc; ++f) {
5815 kmp_info_t *thr = team->t.t_threads[f];
5816 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5817 thr->th.th_cg_roots->cg_root == thr);
5819 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5820 thr->th.th_cg_roots = tmp->up;
5821 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5822 " up to node %p. cg_nthreads was %d\n",
5823 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5824 int i = tmp->cg_nthreads--;
5829 if (thr->th.th_cg_roots)
5830 thr->th.th_current_task->td_icvs.thread_limit =
5831 thr->th.th_cg_roots->cg_thread_limit;
5840kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5841 kmp_team_t *next_pool = team->t.t_next_pool;
5843 KMP_DEBUG_ASSERT(team);
5844 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5845 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5846 KMP_DEBUG_ASSERT(team->t.t_threads);
5847 KMP_DEBUG_ASSERT(team->t.t_argv);
5852 __kmp_free_team_arrays(team);
5853 if (team->t.t_argv != &team->t.t_inline_argv[0])
5854 __kmp_free((
void *)team->t.t_argv);
5886void __kmp_free_thread(kmp_info_t *this_th) {
5890 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5891 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5893 KMP_DEBUG_ASSERT(this_th);
5898 kmp_balign_t *balign = this_th->th.th_bar;
5899 for (b = 0; b < bs_last_barrier; ++b) {
5900 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5901 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5902 balign[b].bb.team = NULL;
5903 balign[b].bb.leaf_kids = 0;
5905 this_th->th.th_task_state = 0;
5906 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5909 TCW_PTR(this_th->th.th_team, NULL);
5910 TCW_PTR(this_th->th.th_root, NULL);
5911 TCW_PTR(this_th->th.th_dispatch, NULL);
5913 while (this_th->th.th_cg_roots) {
5914 this_th->th.th_cg_roots->cg_nthreads--;
5915 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5916 " %p of thread %p to %d\n",
5917 this_th, this_th->th.th_cg_roots,
5918 this_th->th.th_cg_roots->cg_root,
5919 this_th->th.th_cg_roots->cg_nthreads));
5920 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5921 if (tmp->cg_root == this_th) {
5922 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5924 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5925 this_th->th.th_cg_roots = tmp->up;
5928 if (tmp->cg_nthreads == 0) {
5931 this_th->th.th_cg_roots = NULL;
5941 __kmp_free_implicit_task(this_th);
5942 this_th->th.th_current_task = NULL;
5946 gtid = this_th->th.th_info.ds.ds_gtid;
5947 if (__kmp_thread_pool_insert_pt != NULL) {
5948 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5949 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5950 __kmp_thread_pool_insert_pt = NULL;
5959 if (__kmp_thread_pool_insert_pt != NULL) {
5960 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5962 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5964 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5965 scan = &((*scan)->th.th_next_pool))
5970 TCW_PTR(this_th->th.th_next_pool, *scan);
5971 __kmp_thread_pool_insert_pt = *scan = this_th;
5972 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5973 (this_th->th.th_info.ds.ds_gtid <
5974 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5975 TCW_4(this_th->th.th_in_pool, TRUE);
5976 __kmp_suspend_initialize_thread(this_th);
5977 __kmp_lock_suspend_mx(this_th);
5978 if (this_th->th.th_active == TRUE) {
5979 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5980 this_th->th.th_active_in_pool = TRUE;
5984 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5987 __kmp_unlock_suspend_mx(this_th);
5989 TCW_4(__kmp_nth, __kmp_nth - 1);
5991#ifdef KMP_ADJUST_BLOCKTIME
5994 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5995 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5996 if (__kmp_nth <= __kmp_avail_proc) {
5997 __kmp_zero_bt = FALSE;
6007void *__kmp_launch_thread(kmp_info_t *this_thr) {
6008#if OMP_PROFILING_SUPPORT
6009 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
6011 if (ProfileTraceFile)
6012 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
6015 int gtid = this_thr->th.th_info.ds.ds_gtid;
6017 kmp_team_t **
volatile pteam;
6020 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
6022 if (__kmp_env_consistency_check) {
6023 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
6027 if (ompd_state & OMPD_ENABLE_BP)
6028 ompd_bp_thread_begin();
6032 ompt_data_t *thread_data =
nullptr;
6033 if (ompt_enabled.enabled) {
6034 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
6035 *thread_data = ompt_data_none;
6037 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6038 this_thr->th.ompt_thread_info.wait_id = 0;
6039 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
6040 this_thr->th.ompt_thread_info.parallel_flags = 0;
6041 if (ompt_enabled.ompt_callback_thread_begin) {
6042 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
6043 ompt_thread_worker, thread_data);
6045 this_thr->th.ompt_thread_info.state = ompt_state_idle;
6050 while (!TCR_4(__kmp_global.g.g_done)) {
6051 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6055 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6058 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6061 if (ompt_enabled.enabled) {
6062 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6066 pteam = &this_thr->th.th_team;
6069 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6071 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6074 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6075 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6076 (*pteam)->t.t_pkfn));
6078 updateHWFPControl(*pteam);
6081 if (ompt_enabled.enabled) {
6082 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6086 rc = (*pteam)->t.t_invoke(gtid);
6090 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6091 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6092 (*pteam)->t.t_pkfn));
6095 if (ompt_enabled.enabled) {
6097 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6099 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6103 __kmp_join_barrier(gtid);
6106 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
6109 if (ompd_state & OMPD_ENABLE_BP)
6110 ompd_bp_thread_end();
6114 if (ompt_enabled.ompt_callback_thread_end) {
6115 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6119 this_thr->th.th_task_team = NULL;
6121 __kmp_common_destroy_gtid(gtid);
6123 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6126#if OMP_PROFILING_SUPPORT
6127 llvm::timeTraceProfilerFinishThread();
6134void __kmp_internal_end_dest(
void *specific_gtid) {
6137 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6139 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6143 __kmp_internal_end_thread(gtid);
6146#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6148__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6149 __kmp_internal_end_atexit();
6156void __kmp_internal_end_atexit(
void) {
6157 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6181 __kmp_internal_end_library(-1);
6183 __kmp_close_console();
6187static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6192 KMP_DEBUG_ASSERT(thread != NULL);
6194 gtid = thread->th.th_info.ds.ds_gtid;
6197 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6200 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6202 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6204 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6206 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6210 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6212 __kmp_release_64(&flag);
6217 __kmp_reap_worker(thread);
6229 if (thread->th.th_active_in_pool) {
6230 thread->th.th_active_in_pool = FALSE;
6231 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6232 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6236 __kmp_free_implicit_task(thread);
6240 __kmp_free_fast_memory(thread);
6243 __kmp_suspend_uninitialize_thread(thread);
6245 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6246 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6251#ifdef KMP_ADJUST_BLOCKTIME
6254 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6255 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6256 if (__kmp_nth <= __kmp_avail_proc) {
6257 __kmp_zero_bt = FALSE;
6263 if (__kmp_env_consistency_check) {
6264 if (thread->th.th_cons) {
6265 __kmp_free_cons_stack(thread->th.th_cons);
6266 thread->th.th_cons = NULL;
6270 if (thread->th.th_pri_common != NULL) {
6271 __kmp_free(thread->th.th_pri_common);
6272 thread->th.th_pri_common = NULL;
6275 if (thread->th.th_task_state_memo_stack != NULL) {
6276 __kmp_free(thread->th.th_task_state_memo_stack);
6277 thread->th.th_task_state_memo_stack = NULL;
6281 if (thread->th.th_local.bget_data != NULL) {
6282 __kmp_finalize_bget(thread);
6286#if KMP_AFFINITY_SUPPORTED
6287 if (thread->th.th_affin_mask != NULL) {
6288 KMP_CPU_FREE(thread->th.th_affin_mask);
6289 thread->th.th_affin_mask = NULL;
6293#if KMP_USE_HIER_SCHED
6294 if (thread->th.th_hier_bar_data != NULL) {
6295 __kmp_free(thread->th.th_hier_bar_data);
6296 thread->th.th_hier_bar_data = NULL;
6300 __kmp_reap_team(thread->th.th_serial_team);
6301 thread->th.th_serial_team = NULL;
6308static void __kmp_itthash_clean(kmp_info_t *th) {
6310 if (__kmp_itt_region_domains.count > 0) {
6311 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6312 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6314 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6315 __kmp_thread_free(th, bucket);
6320 if (__kmp_itt_barrier_domains.count > 0) {
6321 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6322 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6324 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6325 __kmp_thread_free(th, bucket);
6333static void __kmp_internal_end(
void) {
6337 __kmp_unregister_library();
6344 __kmp_reclaim_dead_roots();
6348 for (i = 0; i < __kmp_threads_capacity; i++)
6350 if (__kmp_root[i]->r.r_active)
6353 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6355 if (i < __kmp_threads_capacity) {
6367 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6368 if (TCR_4(__kmp_init_monitor)) {
6369 __kmp_reap_monitor(&__kmp_monitor);
6370 TCW_4(__kmp_init_monitor, 0);
6372 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6373 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6379 for (i = 0; i < __kmp_threads_capacity; i++) {
6380 if (__kmp_root[i]) {
6383 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6392 while (__kmp_thread_pool != NULL) {
6394 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6395 __kmp_thread_pool = thread->th.th_next_pool;
6397 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6398 thread->th.th_next_pool = NULL;
6399 thread->th.th_in_pool = FALSE;
6400 __kmp_reap_thread(thread, 0);
6402 __kmp_thread_pool_insert_pt = NULL;
6405 while (__kmp_team_pool != NULL) {
6407 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6408 __kmp_team_pool = team->t.t_next_pool;
6410 team->t.t_next_pool = NULL;
6411 __kmp_reap_team(team);
6414 __kmp_reap_task_teams();
6421 for (i = 0; i < __kmp_threads_capacity; i++) {
6422 kmp_info_t *thr = __kmp_threads[i];
6423 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6428 for (i = 0; i < __kmp_threads_capacity; ++i) {
6435 TCW_SYNC_4(__kmp_init_common, FALSE);
6437 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6445 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6446 if (TCR_4(__kmp_init_monitor)) {
6447 __kmp_reap_monitor(&__kmp_monitor);
6448 TCW_4(__kmp_init_monitor, 0);
6450 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6451 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6454 TCW_4(__kmp_init_gtid, FALSE);
6463void __kmp_internal_end_library(
int gtid_req) {
6470 if (__kmp_global.g.g_abort) {
6471 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6475 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6476 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6481 if (TCR_4(__kmp_init_hidden_helper) &&
6482 !TCR_4(__kmp_hidden_helper_team_done)) {
6483 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6485 __kmp_hidden_helper_main_thread_release();
6487 __kmp_hidden_helper_threads_deinitz_wait();
6493 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6495 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6496 if (gtid == KMP_GTID_SHUTDOWN) {
6497 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6498 "already shutdown\n"));
6500 }
else if (gtid == KMP_GTID_MONITOR) {
6501 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6502 "registered, or system shutdown\n"));
6504 }
else if (gtid == KMP_GTID_DNE) {
6505 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6508 }
else if (KMP_UBER_GTID(gtid)) {
6510 if (__kmp_root[gtid]->r.r_active) {
6511 __kmp_global.g.g_abort = -1;
6512 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6513 __kmp_unregister_library();
6515 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6519 __kmp_itthash_clean(__kmp_threads[gtid]);
6522 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6523 __kmp_unregister_root_current_thread(gtid);
6530#ifdef DUMP_DEBUG_ON_EXIT
6531 if (__kmp_debug_buf)
6532 __kmp_dump_debug_buffer();
6537 __kmp_unregister_library();
6542 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6545 if (__kmp_global.g.g_abort) {
6546 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6548 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6551 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6552 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6561 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6564 __kmp_internal_end();
6566 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6567 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6569 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6571#ifdef DUMP_DEBUG_ON_EXIT
6572 if (__kmp_debug_buf)
6573 __kmp_dump_debug_buffer();
6577 __kmp_close_console();
6580 __kmp_fini_allocator();
6584void __kmp_internal_end_thread(
int gtid_req) {
6593 if (__kmp_global.g.g_abort) {
6594 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6598 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6599 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6604 if (TCR_4(__kmp_init_hidden_helper) &&
6605 !TCR_4(__kmp_hidden_helper_team_done)) {
6606 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6608 __kmp_hidden_helper_main_thread_release();
6610 __kmp_hidden_helper_threads_deinitz_wait();
6617 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6619 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6620 if (gtid == KMP_GTID_SHUTDOWN) {
6621 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6622 "already shutdown\n"));
6624 }
else if (gtid == KMP_GTID_MONITOR) {
6625 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6626 "registered, or system shutdown\n"));
6628 }
else if (gtid == KMP_GTID_DNE) {
6629 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6633 }
else if (KMP_UBER_GTID(gtid)) {
6635 if (__kmp_root[gtid]->r.r_active) {
6636 __kmp_global.g.g_abort = -1;
6637 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6639 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6643 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6645 __kmp_unregister_root_current_thread(gtid);
6649 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6652 __kmp_threads[gtid]->th.th_task_team = NULL;
6656 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6662 if (__kmp_pause_status != kmp_hard_paused)
6666 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6671 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6674 if (__kmp_global.g.g_abort) {
6675 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6677 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6680 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6681 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6692 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6694 for (i = 0; i < __kmp_threads_capacity; ++i) {
6695 if (KMP_UBER_GTID(i)) {
6698 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6699 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6700 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6707 __kmp_internal_end();
6709 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6710 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6712 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6714#ifdef DUMP_DEBUG_ON_EXIT
6715 if (__kmp_debug_buf)
6716 __kmp_dump_debug_buffer();
6723static long __kmp_registration_flag = 0;
6725static char *__kmp_registration_str = NULL;
6728static inline char *__kmp_reg_status_name() {
6734#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6735 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6738 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6742#if defined(KMP_USE_SHM)
6744char *temp_reg_status_file_name =
nullptr;
6747void __kmp_register_library_startup(
void) {
6749 char *name = __kmp_reg_status_name();
6755#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6756 __kmp_initialize_system_tick();
6758 __kmp_read_system_time(&time.dtime);
6759 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6760 __kmp_registration_str =
6761 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6762 __kmp_registration_flag, KMP_LIBRARY_FILE);
6764 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6765 __kmp_registration_str));
6771#if defined(KMP_USE_SHM)
6772 char *shm_name = __kmp_str_format(
"/%s", name);
6773 int shm_preexist = 0;
6775 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6776 if ((fd1 == -1) && (errno == EEXIST)) {
6779 fd1 = shm_open(shm_name, O_RDWR, 0666);
6782 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6788 }
else if (fd1 == -1) {
6793 char *temp_file_name = __kmp_str_format(
"/tmp/%sXXXXXX", name);
6794 fd1 = mkstemp(temp_file_name);
6797 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open TEMP"), KMP_ERR(errno),
6800 temp_reg_status_file_name = temp_file_name;
6802 if (shm_preexist == 0) {
6804 if (ftruncate(fd1, SHM_SIZE) == -1) {
6806 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6807 KMP_ERR(errno), __kmp_msg_null);
6811 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6812 if (data1 == MAP_FAILED) {
6814 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6817 if (shm_preexist == 0) {
6818 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6821 value = __kmp_str_format(
"%s", data1);
6822 munmap(data1, SHM_SIZE);
6826 __kmp_env_set(name, __kmp_registration_str, 0);
6828 value = __kmp_env_get(name);
6831 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6838 char *flag_addr_str = NULL;
6839 char *flag_val_str = NULL;
6840 char const *file_name = NULL;
6841 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6842 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6845 unsigned long *flag_addr = 0;
6846 unsigned long flag_val = 0;
6847 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6848 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6849 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6853 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6867 file_name =
"unknown library";
6872 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6873 if (!__kmp_str_match_true(duplicate_ok)) {
6875 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6876 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6878 KMP_INTERNAL_FREE(duplicate_ok);
6879 __kmp_duplicate_library_ok = 1;
6884#if defined(KMP_USE_SHM)
6886 shm_unlink(shm_name);
6889 __kmp_env_unset(name);
6893 KMP_DEBUG_ASSERT(0);
6897 KMP_INTERNAL_FREE((
void *)value);
6898#if defined(KMP_USE_SHM)
6899 KMP_INTERNAL_FREE((
void *)shm_name);
6902 KMP_INTERNAL_FREE((
void *)name);
6906void __kmp_unregister_library(
void) {
6908 char *name = __kmp_reg_status_name();
6911#if defined(KMP_USE_SHM)
6912 bool use_shm =
true;
6913 char *shm_name = __kmp_str_format(
"/%s", name);
6914 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6918 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6919 fd1 = open(temp_reg_status_file_name, O_RDONLY);
6925 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6926 if (data1 != MAP_FAILED) {
6927 value = __kmp_str_format(
"%s", data1);
6928 munmap(data1, SHM_SIZE);
6932 value = __kmp_env_get(name);
6935 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6936 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6937 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6939#if defined(KMP_USE_SHM)
6941 shm_unlink(shm_name);
6943 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6944 unlink(temp_reg_status_file_name);
6947 __kmp_env_unset(name);
6951#if defined(KMP_USE_SHM)
6952 KMP_INTERNAL_FREE(shm_name);
6954 KMP_DEBUG_ASSERT(temp_reg_status_file_name);
6955 KMP_INTERNAL_FREE(temp_reg_status_file_name);
6959 KMP_INTERNAL_FREE(__kmp_registration_str);
6960 KMP_INTERNAL_FREE(value);
6961 KMP_INTERNAL_FREE(name);
6963 __kmp_registration_flag = 0;
6964 __kmp_registration_str = NULL;
6971#if KMP_MIC_SUPPORTED
6973static void __kmp_check_mic_type() {
6974 kmp_cpuid_t cpuid_state = {0};
6975 kmp_cpuid_t *cs_p = &cpuid_state;
6976 __kmp_x86_cpuid(1, 0, cs_p);
6978 if ((cs_p->eax & 0xff0) == 0xB10) {
6979 __kmp_mic_type = mic2;
6980 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6981 __kmp_mic_type = mic3;
6983 __kmp_mic_type = non_mic;
6990static void __kmp_user_level_mwait_init() {
6991 struct kmp_cpuid buf;
6992 __kmp_x86_cpuid(7, 0, &buf);
6993 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6994 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6995 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6996 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6997 __kmp_umwait_enabled));
7000#ifndef AT_INTELPHIUSERMWAIT
7003#define AT_INTELPHIUSERMWAIT 10000
7008unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
7009unsigned long getauxval(
unsigned long) {
return 0; }
7011static void __kmp_user_level_mwait_init() {
7016 if (__kmp_mic_type == mic3) {
7017 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
7018 if ((res & 0x1) || __kmp_user_level_mwait) {
7019 __kmp_mwait_enabled = TRUE;
7020 if (__kmp_user_level_mwait) {
7021 KMP_INFORM(EnvMwaitWarn);
7024 __kmp_mwait_enabled = FALSE;
7027 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
7028 "__kmp_mwait_enabled = %d\n",
7029 __kmp_mic_type, __kmp_mwait_enabled));
7033static void __kmp_do_serial_initialize(
void) {
7037 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
7039 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
7040 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
7041 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
7042 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
7043 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
7053 __kmp_validate_locks();
7055#if ENABLE_LIBOMPTARGET
7057 __kmp_init_omptarget();
7061 __kmp_init_allocator();
7067 if (__kmp_need_register_serial)
7068 __kmp_register_library_startup();
7071 if (TCR_4(__kmp_global.g.g_done)) {
7072 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
7075 __kmp_global.g.g_abort = 0;
7076 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7079#if KMP_USE_ADAPTIVE_LOCKS
7080#if KMP_DEBUG_ADAPTIVE_LOCKS
7081 __kmp_init_speculative_stats();
7084#if KMP_STATS_ENABLED
7087 __kmp_init_lock(&__kmp_global_lock);
7088 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7089 __kmp_init_lock(&__kmp_debug_lock);
7090 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7091 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7092 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7093 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7094 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7095 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7096 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7097 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7098 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7099 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7100 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7101 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7102 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7103 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7104 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7106 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7108 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7112 __kmp_runtime_initialize();
7114#if KMP_MIC_SUPPORTED
7115 __kmp_check_mic_type();
7122 __kmp_abort_delay = 0;
7126 __kmp_dflt_team_nth_ub = __kmp_xproc;
7127 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7128 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7130 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7131 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7133 __kmp_max_nth = __kmp_sys_max_nth;
7134 __kmp_cg_max_nth = __kmp_sys_max_nth;
7135 __kmp_teams_max_nth = __kmp_xproc;
7136 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7137 __kmp_teams_max_nth = __kmp_sys_max_nth;
7142 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7144 __kmp_monitor_wakeups =
7145 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7146 __kmp_bt_intervals =
7147 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7150 __kmp_library = library_throughput;
7152 __kmp_static = kmp_sch_static_balanced;
7159#if KMP_FAST_REDUCTION_BARRIER
7160#define kmp_reduction_barrier_gather_bb ((int)1)
7161#define kmp_reduction_barrier_release_bb ((int)1)
7162#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7163#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7165 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7166 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7167 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7168 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7169 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7170#if KMP_FAST_REDUCTION_BARRIER
7171 if (i == bs_reduction_barrier) {
7173 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7174 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7175 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7176 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7180#if KMP_FAST_REDUCTION_BARRIER
7181#undef kmp_reduction_barrier_release_pat
7182#undef kmp_reduction_barrier_gather_pat
7183#undef kmp_reduction_barrier_release_bb
7184#undef kmp_reduction_barrier_gather_bb
7186#if KMP_MIC_SUPPORTED
7187 if (__kmp_mic_type == mic2) {
7189 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7190 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7192 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7193 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7195#if KMP_FAST_REDUCTION_BARRIER
7196 if (__kmp_mic_type == mic2) {
7197 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7198 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7205 __kmp_env_checks = TRUE;
7207 __kmp_env_checks = FALSE;
7211 __kmp_foreign_tp = TRUE;
7213 __kmp_global.g.g_dynamic = FALSE;
7214 __kmp_global.g.g_dynamic_mode = dynamic_default;
7216 __kmp_init_nesting_mode();
7218 __kmp_env_initialize(NULL);
7220#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7221 __kmp_user_level_mwait_init();
7225 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7226 if (__kmp_str_match_true(val)) {
7227 kmp_str_buf_t buffer;
7228 __kmp_str_buf_init(&buffer);
7229 __kmp_i18n_dump_catalog(&buffer);
7230 __kmp_printf(
"%s", buffer.str);
7231 __kmp_str_buf_free(&buffer);
7233 __kmp_env_free(&val);
7236 __kmp_threads_capacity =
7237 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7239 __kmp_tp_capacity = __kmp_default_tp_capacity(
7240 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7245 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7246 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7247 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7248 __kmp_thread_pool = NULL;
7249 __kmp_thread_pool_insert_pt = NULL;
7250 __kmp_team_pool = NULL;
7257 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7259 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7260 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7261 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7264 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7266 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7271 gtid = __kmp_register_root(TRUE);
7272 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7273 KMP_ASSERT(KMP_UBER_GTID(gtid));
7274 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7278 __kmp_common_initialize();
7282 __kmp_register_atfork();
7285#if !KMP_DYNAMIC_LIB || \
7286 ((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)
7291 int rc = atexit(__kmp_internal_end_atexit);
7293 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7299#if KMP_HANDLE_SIGNALS
7305 __kmp_install_signals(FALSE);
7308 __kmp_install_signals(TRUE);
7313 __kmp_init_counter++;
7315 __kmp_init_serial = TRUE;
7317 if (__kmp_settings) {
7321 if (__kmp_display_env || __kmp_display_env_verbose) {
7322 __kmp_env_print_2();
7331 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7334void __kmp_serial_initialize(
void) {
7335 if (__kmp_init_serial) {
7338 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7339 if (__kmp_init_serial) {
7340 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7343 __kmp_do_serial_initialize();
7344 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7347static void __kmp_do_middle_initialize(
void) {
7349 int prev_dflt_team_nth;
7351 if (!__kmp_init_serial) {
7352 __kmp_do_serial_initialize();
7355 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7357 if (UNLIKELY(!__kmp_need_register_serial)) {
7360 __kmp_register_library_startup();
7365 prev_dflt_team_nth = __kmp_dflt_team_nth;
7367#if KMP_AFFINITY_SUPPORTED
7370 __kmp_affinity_initialize(__kmp_affinity);
7374 KMP_ASSERT(__kmp_xproc > 0);
7375 if (__kmp_avail_proc == 0) {
7376 __kmp_avail_proc = __kmp_xproc;
7382 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7383 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7388 if (__kmp_dflt_team_nth == 0) {
7389#ifdef KMP_DFLT_NTH_CORES
7391 __kmp_dflt_team_nth = __kmp_ncores;
7392 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7393 "__kmp_ncores (%d)\n",
7394 __kmp_dflt_team_nth));
7397 __kmp_dflt_team_nth = __kmp_avail_proc;
7398 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7399 "__kmp_avail_proc(%d)\n",
7400 __kmp_dflt_team_nth));
7404 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7405 __kmp_dflt_team_nth = KMP_MIN_NTH;
7407 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7408 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7411 if (__kmp_nesting_mode > 0)
7412 __kmp_set_nesting_mode_threads();
7416 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7418 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7423 for (i = 0; i < __kmp_threads_capacity; i++) {
7424 kmp_info_t *thread = __kmp_threads[i];
7427 if (thread->th.th_current_task->td_icvs.nproc != 0)
7430 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7435 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7436 __kmp_dflt_team_nth));
7438#ifdef KMP_ADJUST_BLOCKTIME
7440 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7441 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7442 if (__kmp_nth > __kmp_avail_proc) {
7443 __kmp_zero_bt = TRUE;
7449 TCW_SYNC_4(__kmp_init_middle, TRUE);
7451 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7454void __kmp_middle_initialize(
void) {
7455 if (__kmp_init_middle) {
7458 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7459 if (__kmp_init_middle) {
7460 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7463 __kmp_do_middle_initialize();
7464 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7467void __kmp_parallel_initialize(
void) {
7468 int gtid = __kmp_entry_gtid();
7471 if (TCR_4(__kmp_init_parallel))
7473 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7474 if (TCR_4(__kmp_init_parallel)) {
7475 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7480 if (TCR_4(__kmp_global.g.g_done)) {
7483 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7484 __kmp_infinite_loop();
7490 if (!__kmp_init_middle) {
7491 __kmp_do_middle_initialize();
7493 __kmp_assign_root_init_mask();
7494 __kmp_resume_if_hard_paused();
7497 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7498 KMP_ASSERT(KMP_UBER_GTID(gtid));
7500#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7503 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7504 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7505 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7509#if KMP_HANDLE_SIGNALS
7511 __kmp_install_signals(TRUE);
7515 __kmp_suspend_initialize();
7517#if defined(USE_LOAD_BALANCE)
7518 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7519 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7522 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7523 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7527 if (__kmp_version) {
7528 __kmp_print_version_2();
7532 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7535 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7537 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7540void __kmp_hidden_helper_initialize() {
7541 if (TCR_4(__kmp_init_hidden_helper))
7545 if (!TCR_4(__kmp_init_parallel))
7546 __kmp_parallel_initialize();
7550 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7551 if (TCR_4(__kmp_init_hidden_helper)) {
7552 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7556#if KMP_AFFINITY_SUPPORTED
7560 if (!__kmp_hh_affinity.flags.initialized)
7561 __kmp_affinity_initialize(__kmp_hh_affinity);
7565 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7569 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7572 __kmp_do_initialize_hidden_helper_threads();
7575 __kmp_hidden_helper_threads_initz_wait();
7578 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7580 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7585void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7587 kmp_disp_t *dispatch;
7592 this_thr->th.th_local.this_construct = 0;
7594 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7596 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7597 KMP_DEBUG_ASSERT(dispatch);
7598 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7602 dispatch->th_disp_index = 0;
7603 dispatch->th_doacross_buf_idx = 0;
7604 if (__kmp_env_consistency_check)
7605 __kmp_push_parallel(gtid, team->t.t_ident);
7610void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7612 if (__kmp_env_consistency_check)
7613 __kmp_pop_parallel(gtid, team->t.t_ident);
7615 __kmp_finish_implicit_task(this_thr);
7618int __kmp_invoke_task_func(
int gtid) {
7620 int tid = __kmp_tid_from_gtid(gtid);
7621 kmp_info_t *this_thr = __kmp_threads[gtid];
7622 kmp_team_t *team = this_thr->th.th_team;
7624 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7626 if (__itt_stack_caller_create_ptr) {
7628 if (team->t.t_stack_id != NULL) {
7629 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7631 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7632 __kmp_itt_stack_callee_enter(
7633 (__itt_caller)team->t.t_parent->t.t_stack_id);
7637#if INCLUDE_SSC_MARKS
7638 SSC_MARK_INVOKING();
7643 void **exit_frame_p;
7644 ompt_data_t *my_task_data;
7645 ompt_data_t *my_parallel_data;
7648 if (ompt_enabled.enabled) {
7649 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7650 .ompt_task_info.frame.exit_frame.ptr);
7652 exit_frame_p = &dummy;
7656 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7657 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7658 if (ompt_enabled.ompt_callback_implicit_task) {
7659 ompt_team_size = team->t.t_nproc;
7660 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7661 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7662 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7663 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7667#if KMP_STATS_ENABLED
7669 if (previous_state == stats_state_e::TEAMS_REGION) {
7670 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7672 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7674 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7677 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7678 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7685 *exit_frame_p = NULL;
7686 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7689#if KMP_STATS_ENABLED
7690 if (previous_state == stats_state_e::TEAMS_REGION) {
7691 KMP_SET_THREAD_STATE(previous_state);
7693 KMP_POP_PARTITIONED_TIMER();
7697 if (__itt_stack_caller_create_ptr) {
7699 if (team->t.t_stack_id != NULL) {
7700 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7702 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7703 __kmp_itt_stack_callee_leave(
7704 (__itt_caller)team->t.t_parent->t.t_stack_id);
7708 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7713void __kmp_teams_master(
int gtid) {
7715 kmp_info_t *thr = __kmp_threads[gtid];
7716 kmp_team_t *team = thr->th.th_team;
7717 ident_t *loc = team->t.t_ident;
7718 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7719 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7720 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7721 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7722 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7725 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7728 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7729 tmp->cg_nthreads = 1;
7730 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7731 " cg_nthreads to 1\n",
7733 tmp->up = thr->th.th_cg_roots;
7734 thr->th.th_cg_roots = tmp;
7738#if INCLUDE_SSC_MARKS
7741 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7742 (microtask_t)thr->th.th_teams_microtask,
7743 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7744#if INCLUDE_SSC_MARKS
7748 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7749 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7752 __kmp_join_call(loc, gtid
7761int __kmp_invoke_teams_master(
int gtid) {
7762 kmp_info_t *this_thr = __kmp_threads[gtid];
7763 kmp_team_t *team = this_thr->th.th_team;
7765 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7766 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7767 (
void *)__kmp_teams_master);
7769 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7771 int tid = __kmp_tid_from_gtid(gtid);
7772 ompt_data_t *task_data =
7773 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7774 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7775 if (ompt_enabled.ompt_callback_implicit_task) {
7776 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7777 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7779 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7782 __kmp_teams_master(gtid);
7784 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7786 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7795void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7796 kmp_info_t *thr = __kmp_threads[gtid];
7798 if (num_threads > 0)
7799 thr->th.th_set_nproc = num_threads;
7802static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7804 KMP_DEBUG_ASSERT(thr);
7806 if (!TCR_4(__kmp_init_middle))
7807 __kmp_middle_initialize();
7808 __kmp_assign_root_init_mask();
7809 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7810 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7812 if (num_threads == 0) {
7813 if (__kmp_teams_thread_limit > 0) {
7814 num_threads = __kmp_teams_thread_limit;
7816 num_threads = __kmp_avail_proc / num_teams;
7821 if (num_threads > __kmp_dflt_team_nth) {
7822 num_threads = __kmp_dflt_team_nth;
7824 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7825 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7827 if (num_teams * num_threads > __kmp_teams_max_nth) {
7828 num_threads = __kmp_teams_max_nth / num_teams;
7830 if (num_threads == 0) {
7834 if (num_threads < 0) {
7835 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7841 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7843 if (num_threads > __kmp_dflt_team_nth) {
7844 num_threads = __kmp_dflt_team_nth;
7846 if (num_teams * num_threads > __kmp_teams_max_nth) {
7847 int new_threads = __kmp_teams_max_nth / num_teams;
7848 if (new_threads == 0) {
7851 if (new_threads != num_threads) {
7852 if (!__kmp_reserve_warn) {
7853 __kmp_reserve_warn = 1;
7854 __kmp_msg(kmp_ms_warning,
7855 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7856 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7859 num_threads = new_threads;
7862 thr->th.th_teams_size.nth = num_threads;
7867void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7869 kmp_info_t *thr = __kmp_threads[gtid];
7870 if (num_teams < 0) {
7873 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7877 if (num_teams == 0) {
7878 if (__kmp_nteams > 0) {
7879 num_teams = __kmp_nteams;
7884 if (num_teams > __kmp_teams_max_nth) {
7885 if (!__kmp_reserve_warn) {
7886 __kmp_reserve_warn = 1;
7887 __kmp_msg(kmp_ms_warning,
7888 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7889 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7891 num_teams = __kmp_teams_max_nth;
7895 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7897 __kmp_push_thread_limit(thr, num_teams, num_threads);
7902void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7903 int num_teams_ub,
int num_threads) {
7904 kmp_info_t *thr = __kmp_threads[gtid];
7905 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7906 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7907 KMP_DEBUG_ASSERT(num_threads >= 0);
7909 if (num_teams_lb > num_teams_ub) {
7910 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7911 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7916 if (num_teams_lb == 0 && num_teams_ub > 0)
7917 num_teams_lb = num_teams_ub;
7919 if (num_teams_lb == 0 && num_teams_ub == 0) {
7920 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7921 if (num_teams > __kmp_teams_max_nth) {
7922 if (!__kmp_reserve_warn) {
7923 __kmp_reserve_warn = 1;
7924 __kmp_msg(kmp_ms_warning,
7925 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7926 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7928 num_teams = __kmp_teams_max_nth;
7930 }
else if (num_teams_lb == num_teams_ub) {
7931 num_teams = num_teams_ub;
7933 if (num_threads <= 0) {
7934 if (num_teams_ub > __kmp_teams_max_nth) {
7935 num_teams = num_teams_lb;
7937 num_teams = num_teams_ub;
7940 num_teams = (num_threads > __kmp_teams_max_nth)
7942 : __kmp_teams_max_nth / num_threads;
7943 if (num_teams < num_teams_lb) {
7944 num_teams = num_teams_lb;
7945 }
else if (num_teams > num_teams_ub) {
7946 num_teams = num_teams_ub;
7952 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7954 __kmp_push_thread_limit(thr, num_teams, num_threads);
7958void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7959 kmp_info_t *thr = __kmp_threads[gtid];
7960 thr->th.th_set_proc_bind = proc_bind;
7965void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7966 kmp_info_t *this_thr = __kmp_threads[gtid];
7972 KMP_DEBUG_ASSERT(team);
7973 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7974 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7977 team->t.t_construct = 0;
7978 team->t.t_ordered.dt.t_value =
7982 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7983 if (team->t.t_max_nproc > 1) {
7985 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7986 team->t.t_disp_buffer[i].buffer_index = i;
7987 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7990 team->t.t_disp_buffer[0].buffer_index = 0;
7991 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7995 KMP_ASSERT(this_thr->th.th_team == team);
7998 for (f = 0; f < team->t.t_nproc; f++) {
7999 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
8000 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
8005 __kmp_fork_barrier(gtid, 0);
8008void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
8009 kmp_info_t *this_thr = __kmp_threads[gtid];
8011 KMP_DEBUG_ASSERT(team);
8012 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
8013 KMP_ASSERT(KMP_MASTER_GTID(gtid));
8019 if (__kmp_threads[gtid] &&
8020 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
8021 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
8022 __kmp_threads[gtid]);
8023 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
8024 "team->t.t_nproc=%d\n",
8025 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
8027 __kmp_print_structure();
8029 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
8030 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
8033 __kmp_join_barrier(gtid);
8035 if (ompt_enabled.enabled &&
8036 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
8037 int ds_tid = this_thr->th.th_info.ds.ds_tid;
8038 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
8039 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
8041 void *codeptr = NULL;
8042 if (KMP_MASTER_TID(ds_tid) &&
8043 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
8044 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
8045 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
8047 if (ompt_enabled.ompt_callback_sync_region_wait) {
8048 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
8049 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8052 if (ompt_enabled.ompt_callback_sync_region) {
8053 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
8054 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
8058 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
8059 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
8060 ompt_scope_end, NULL, task_data, 0, ds_tid,
8061 ompt_task_implicit);
8067 KMP_ASSERT(this_thr->th.th_team == team);
8072#ifdef USE_LOAD_BALANCE
8076static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
8079 kmp_team_t *hot_team;
8081 if (root->r.r_active) {
8084 hot_team = root->r.r_hot_team;
8085 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
8086 return hot_team->t.t_nproc - 1;
8091 for (i = 1; i < hot_team->t.t_nproc; i++) {
8092 if (hot_team->t.t_threads[i]->th.th_active) {
8101static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8104 int hot_team_active;
8105 int team_curr_active;
8108 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8110 KMP_DEBUG_ASSERT(root);
8111 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8112 ->th.th_current_task->td_icvs.dynamic == TRUE);
8113 KMP_DEBUG_ASSERT(set_nproc > 1);
8115 if (set_nproc == 1) {
8116 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8125 pool_active = __kmp_thread_pool_active_nth;
8126 hot_team_active = __kmp_active_hot_team_nproc(root);
8127 team_curr_active = pool_active + hot_team_active + 1;
8130 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8131 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8132 "hot team active = %d\n",
8133 system_active, pool_active, hot_team_active));
8135 if (system_active < 0) {
8139 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8140 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8143 retval = __kmp_avail_proc - __kmp_nth +
8144 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8145 if (retval > set_nproc) {
8148 if (retval < KMP_MIN_NTH) {
8149 retval = KMP_MIN_NTH;
8152 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8160 if (system_active < team_curr_active) {
8161 system_active = team_curr_active;
8163 retval = __kmp_avail_proc - system_active + team_curr_active;
8164 if (retval > set_nproc) {
8167 if (retval < KMP_MIN_NTH) {
8168 retval = KMP_MIN_NTH;
8171 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8180void __kmp_cleanup(
void) {
8183 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8185 if (TCR_4(__kmp_init_parallel)) {
8186#if KMP_HANDLE_SIGNALS
8187 __kmp_remove_signals();
8189 TCW_4(__kmp_init_parallel, FALSE);
8192 if (TCR_4(__kmp_init_middle)) {
8193#if KMP_AFFINITY_SUPPORTED
8194 __kmp_affinity_uninitialize();
8196 __kmp_cleanup_hierarchy();
8197 TCW_4(__kmp_init_middle, FALSE);
8200 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8202 if (__kmp_init_serial) {
8203 __kmp_runtime_destroy();
8204 __kmp_init_serial = FALSE;
8207 __kmp_cleanup_threadprivate_caches();
8209 for (f = 0; f < __kmp_threads_capacity; f++) {
8210 if (__kmp_root[f] != NULL) {
8211 __kmp_free(__kmp_root[f]);
8212 __kmp_root[f] = NULL;
8215 __kmp_free(__kmp_threads);
8218 __kmp_threads = NULL;
8220 __kmp_threads_capacity = 0;
8223 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8225 kmp_old_threads_list_t *next = ptr->next;
8226 __kmp_free(ptr->threads);
8231#if KMP_USE_DYNAMIC_LOCK
8232 __kmp_cleanup_indirect_user_locks();
8234 __kmp_cleanup_user_locks();
8238 __kmp_free(ompd_env_block);
8239 ompd_env_block = NULL;
8240 ompd_env_block_size = 0;
8244#if KMP_AFFINITY_SUPPORTED
8245 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8246 __kmp_cpuinfo_file = NULL;
8249#if KMP_USE_ADAPTIVE_LOCKS
8250#if KMP_DEBUG_ADAPTIVE_LOCKS
8251 __kmp_print_speculative_stats();
8254 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8255 __kmp_nested_nth.nth = NULL;
8256 __kmp_nested_nth.size = 0;
8257 __kmp_nested_nth.used = 0;
8258 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8259 __kmp_nested_proc_bind.bind_types = NULL;
8260 __kmp_nested_proc_bind.size = 0;
8261 __kmp_nested_proc_bind.used = 0;
8262 if (__kmp_affinity_format) {
8263 KMP_INTERNAL_FREE(__kmp_affinity_format);
8264 __kmp_affinity_format = NULL;
8267 __kmp_i18n_catclose();
8269#if KMP_USE_HIER_SCHED
8270 __kmp_hier_scheds.deallocate();
8273#if KMP_STATS_ENABLED
8277 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8282int __kmp_ignore_mppbeg(
void) {
8285 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8286 if (__kmp_str_match_false(env))
8293int __kmp_ignore_mppend(
void) {
8296 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8297 if (__kmp_str_match_false(env))
8304void __kmp_internal_begin(
void) {
8310 gtid = __kmp_entry_gtid();
8311 root = __kmp_threads[gtid]->th.th_root;
8312 KMP_ASSERT(KMP_UBER_GTID(gtid));
8314 if (root->r.r_begin)
8316 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8317 if (root->r.r_begin) {
8318 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8322 root->r.r_begin = TRUE;
8324 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8329void __kmp_user_set_library(
enum library_type arg) {
8336 gtid = __kmp_entry_gtid();
8337 thread = __kmp_threads[gtid];
8339 root = thread->th.th_root;
8341 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8343 if (root->r.r_in_parallel) {
8345 KMP_WARNING(SetLibraryIncorrectCall);
8350 case library_serial:
8351 thread->th.th_set_nproc = 0;
8352 set__nproc(thread, 1);
8354 case library_turnaround:
8355 thread->th.th_set_nproc = 0;
8356 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8357 : __kmp_dflt_team_nth_ub);
8359 case library_throughput:
8360 thread->th.th_set_nproc = 0;
8361 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8362 : __kmp_dflt_team_nth_ub);
8365 KMP_FATAL(UnknownLibraryType, arg);
8368 __kmp_aux_set_library(arg);
8371void __kmp_aux_set_stacksize(
size_t arg) {
8372 if (!__kmp_init_serial)
8373 __kmp_serial_initialize();
8376 if (arg & (0x1000 - 1)) {
8377 arg &= ~(0x1000 - 1);
8382 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8385 if (!TCR_4(__kmp_init_parallel)) {
8388 if (value < __kmp_sys_min_stksize)
8389 value = __kmp_sys_min_stksize;
8390 else if (value > KMP_MAX_STKSIZE)
8391 value = KMP_MAX_STKSIZE;
8393 __kmp_stksize = value;
8395 __kmp_env_stksize = TRUE;
8398 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8403void __kmp_aux_set_library(
enum library_type arg) {
8404 __kmp_library = arg;
8406 switch (__kmp_library) {
8407 case library_serial: {
8408 KMP_INFORM(LibraryIsSerial);
8410 case library_turnaround:
8411 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8412 __kmp_use_yield = 2;
8414 case library_throughput:
8415 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8416 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8419 KMP_FATAL(UnknownLibraryType, arg);
8425static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8426 kmp_info_t *thr = __kmp_entry_thread();
8427 teams_serialized = 0;
8428 if (thr->th.th_teams_microtask) {
8429 kmp_team_t *team = thr->th.th_team;
8430 int tlevel = thr->th.th_teams_level;
8431 int ii = team->t.t_level;
8432 teams_serialized = team->t.t_serialized;
8433 int level = tlevel + 1;
8434 KMP_DEBUG_ASSERT(ii >= tlevel);
8435 while (ii > level) {
8436 for (teams_serialized = team->t.t_serialized;
8437 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8439 if (team->t.t_serialized && (!teams_serialized)) {
8440 team = team->t.t_parent;
8444 team = team->t.t_parent;
8453int __kmp_aux_get_team_num() {
8455 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8457 if (serialized > 1) {
8460 return team->t.t_master_tid;
8466int __kmp_aux_get_num_teams() {
8468 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8470 if (serialized > 1) {
8473 return team->t.t_parent->t.t_nproc;
8512typedef struct kmp_affinity_format_field_t {
8514 const char *long_name;
8517} kmp_affinity_format_field_t;
8519static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8520#if KMP_AFFINITY_SUPPORTED
8521 {
'A',
"thread_affinity",
's'},
8523 {
't',
"team_num",
'd'},
8524 {
'T',
"num_teams",
'd'},
8525 {
'L',
"nesting_level",
'd'},
8526 {
'n',
"thread_num",
'd'},
8527 {
'N',
"num_threads",
'd'},
8528 {
'a',
"ancestor_tnum",
'd'},
8530 {
'P',
"process_id",
'd'},
8531 {
'i',
"native_thread_id",
'd'}};
8534static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8536 kmp_str_buf_t *field_buffer) {
8537 int rc, format_index, field_value;
8538 const char *width_left, *width_right;
8539 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8540 static const int FORMAT_SIZE = 20;
8541 char format[FORMAT_SIZE] = {0};
8542 char absolute_short_name = 0;
8544 KMP_DEBUG_ASSERT(gtid >= 0);
8545 KMP_DEBUG_ASSERT(th);
8546 KMP_DEBUG_ASSERT(**ptr ==
'%');
8547 KMP_DEBUG_ASSERT(field_buffer);
8549 __kmp_str_buf_clear(field_buffer);
8556 __kmp_str_buf_cat(field_buffer,
"%", 1);
8567 right_justify =
false;
8569 right_justify =
true;
8573 width_left = width_right = NULL;
8574 if (**ptr >=
'0' && **ptr <=
'9') {
8582 format[format_index++] =
'%';
8584 format[format_index++] =
'-';
8586 format[format_index++] =
'0';
8587 if (width_left && width_right) {
8591 while (i < 8 && width_left < width_right) {
8592 format[format_index++] = *width_left;
8600 found_valid_name =
false;
8601 parse_long_name = (**ptr ==
'{');
8602 if (parse_long_name)
8604 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8605 sizeof(__kmp_affinity_format_table[0]);
8607 char short_name = __kmp_affinity_format_table[i].short_name;
8608 const char *long_name = __kmp_affinity_format_table[i].long_name;
8609 char field_format = __kmp_affinity_format_table[i].field_format;
8610 if (parse_long_name) {
8611 size_t length = KMP_STRLEN(long_name);
8612 if (strncmp(*ptr, long_name, length) == 0) {
8613 found_valid_name =
true;
8616 }
else if (**ptr == short_name) {
8617 found_valid_name =
true;
8620 if (found_valid_name) {
8621 format[format_index++] = field_format;
8622 format[format_index++] =
'\0';
8623 absolute_short_name = short_name;
8627 if (parse_long_name) {
8629 absolute_short_name = 0;
8637 switch (absolute_short_name) {
8639 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8642 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8645 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8648 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8651 static const int BUFFER_SIZE = 256;
8652 char buf[BUFFER_SIZE];
8653 __kmp_expand_host_name(buf, BUFFER_SIZE);
8654 rc = __kmp_str_buf_print(field_buffer, format, buf);
8657 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8660 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8663 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8667 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8668 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8670#if KMP_AFFINITY_SUPPORTED
8673 __kmp_str_buf_init(&buf);
8674 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8675 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8676 __kmp_str_buf_free(&buf);
8682 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8684 if (parse_long_name) {
8693 KMP_ASSERT(format_index <= FORMAT_SIZE);
8703size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8704 kmp_str_buf_t *buffer) {
8705 const char *parse_ptr;
8707 const kmp_info_t *th;
8708 kmp_str_buf_t field;
8710 KMP_DEBUG_ASSERT(buffer);
8711 KMP_DEBUG_ASSERT(gtid >= 0);
8713 __kmp_str_buf_init(&field);
8714 __kmp_str_buf_clear(buffer);
8716 th = __kmp_threads[gtid];
8722 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8723 parse_ptr = __kmp_affinity_format;
8725 KMP_DEBUG_ASSERT(parse_ptr);
8727 while (*parse_ptr !=
'\0') {
8729 if (*parse_ptr ==
'%') {
8731 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8732 __kmp_str_buf_catbuf(buffer, &field);
8736 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8741 __kmp_str_buf_free(&field);
8746void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8748 __kmp_str_buf_init(&buf);
8749 __kmp_aux_capture_affinity(gtid, format, &buf);
8750 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8751 __kmp_str_buf_free(&buf);
8756void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8757 int blocktime = arg;
8763 __kmp_save_internal_controls(thread);
8766 if (blocktime < KMP_MIN_BLOCKTIME)
8767 blocktime = KMP_MIN_BLOCKTIME;
8768 else if (blocktime > KMP_MAX_BLOCKTIME)
8769 blocktime = KMP_MAX_BLOCKTIME;
8771 set__blocktime_team(thread->th.th_team, tid, blocktime);
8772 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8776 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8778 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8779 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8785 set__bt_set_team(thread->th.th_team, tid, bt_set);
8786 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8788 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8789 "bt_intervals=%d, monitor_updates=%d\n",
8790 __kmp_gtid_from_tid(tid, thread->th.th_team),
8791 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8792 __kmp_monitor_wakeups));
8794 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8795 __kmp_gtid_from_tid(tid, thread->th.th_team),
8796 thread->th.th_team->t.t_id, tid, blocktime));
8800void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8801 if (!__kmp_init_serial) {
8802 __kmp_serial_initialize();
8804 __kmp_env_initialize(str);
8806 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8814PACKED_REDUCTION_METHOD_T
8815__kmp_determine_reduction_method(
8816 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8817 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8818 kmp_critical_name *lck) {
8829 PACKED_REDUCTION_METHOD_T retval;
8833 KMP_DEBUG_ASSERT(lck);
8835#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8837 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8838#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8840 retval = critical_reduce_block;
8843 team_size = __kmp_get_team_num_threads(global_tid);
8844 if (team_size == 1) {
8846 retval = empty_reduce_block;
8850 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8852#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8853 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
8855#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8856 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8858 int teamsize_cutoff = 4;
8860#if KMP_MIC_SUPPORTED
8861 if (__kmp_mic_type != non_mic) {
8862 teamsize_cutoff = 8;
8865 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8866 if (tree_available) {
8867 if (team_size <= teamsize_cutoff) {
8868 if (atomic_available) {
8869 retval = atomic_reduce_block;
8872 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8874 }
else if (atomic_available) {
8875 retval = atomic_reduce_block;
8878#error "Unknown or unsupported OS"
8882#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8884#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8888 if (atomic_available) {
8889 if (num_vars <= 2) {
8890 retval = atomic_reduce_block;
8896 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8897 if (atomic_available && (num_vars <= 3)) {
8898 retval = atomic_reduce_block;
8899 }
else if (tree_available) {
8900 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8901 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8902 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8907#error "Unknown or unsupported OS"
8911#error "Unknown or unsupported architecture"
8919 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8922 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8924 int atomic_available, tree_available;
8926 switch ((forced_retval = __kmp_force_reduction_method)) {
8927 case critical_reduce_block:
8931 case atomic_reduce_block:
8932 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8933 if (!atomic_available) {
8934 KMP_WARNING(RedMethodNotSupported,
"atomic");
8935 forced_retval = critical_reduce_block;
8939 case tree_reduce_block:
8940 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8941 if (!tree_available) {
8942 KMP_WARNING(RedMethodNotSupported,
"tree");
8943 forced_retval = critical_reduce_block;
8945#if KMP_FAST_REDUCTION_BARRIER
8946 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8955 retval = forced_retval;
8958 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8960#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8961#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8966kmp_int32 __kmp_get_reduce_method(
void) {
8967 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8972void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8976void __kmp_hard_pause() {
8977 __kmp_pause_status = kmp_hard_paused;
8978 __kmp_internal_end_thread(-1);
8982void __kmp_resume_if_soft_paused() {
8983 if (__kmp_pause_status == kmp_soft_paused) {
8984 __kmp_pause_status = kmp_not_paused;
8986 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8987 kmp_info_t *thread = __kmp_threads[gtid];
8989 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8991 if (fl.is_sleeping())
8993 else if (__kmp_try_suspend_mx(thread)) {
8994 __kmp_unlock_suspend_mx(thread);
8997 if (fl.is_sleeping()) {
9000 }
else if (__kmp_try_suspend_mx(thread)) {
9001 __kmp_unlock_suspend_mx(thread);
9013int __kmp_pause_resource(kmp_pause_status_t level) {
9014 if (level == kmp_not_paused) {
9015 if (__kmp_pause_status == kmp_not_paused) {
9019 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
9020 __kmp_pause_status == kmp_hard_paused);
9021 __kmp_pause_status = kmp_not_paused;
9024 }
else if (level == kmp_soft_paused) {
9025 if (__kmp_pause_status != kmp_not_paused) {
9032 }
else if (level == kmp_hard_paused) {
9033 if (__kmp_pause_status != kmp_not_paused) {
9046void __kmp_omp_display_env(
int verbose) {
9047 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
9048 if (__kmp_init_serial == 0)
9049 __kmp_do_serial_initialize();
9050 __kmp_display_env_impl(!verbose, verbose);
9051 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
9055void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
9057 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
9059 kmp_info_t **other_threads = team->t.t_threads;
9063 for (
int f = 1; f < old_nthreads; ++f) {
9064 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
9066 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
9072 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
9073 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
9077 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
9079 team->t.t_threads[f]->th.th_used_in_team.store(2);
9080 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
9083 team->t.b->go_release();
9089 int count = old_nthreads - 1;
9091 count = old_nthreads - 1;
9092 for (
int f = 1; f < old_nthreads; ++f) {
9093 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9094 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9095 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9096 void *, other_threads[f]->th.th_sleep_loc);
9097 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9100 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9106 team->t.b->update_num_threads(new_nthreads);
9107 team->t.b->go_reset();
9110void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9112 KMP_DEBUG_ASSERT(team);
9118 for (
int f = 1; f < new_nthreads; ++f) {
9119 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9120 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9122 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9123 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9124 (kmp_flag_32<false, false> *)NULL);
9130 int count = new_nthreads - 1;
9132 count = new_nthreads - 1;
9133 for (
int f = 1; f < new_nthreads; ++f) {
9134 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9142kmp_info_t **__kmp_hidden_helper_threads;
9143kmp_info_t *__kmp_hidden_helper_main_thread;
9144std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9146kmp_int32 __kmp_hidden_helper_threads_num = 8;
9147kmp_int32 __kmp_enable_hidden_helper = TRUE;
9149kmp_int32 __kmp_hidden_helper_threads_num = 0;
9150kmp_int32 __kmp_enable_hidden_helper = FALSE;
9154std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9156void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9161 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9162 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9163 __kmp_hidden_helper_threads_num)
9169 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9170 __kmp_hidden_helper_initz_release();
9171 __kmp_hidden_helper_main_thread_wait();
9173 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9174 __kmp_hidden_helper_worker_thread_signal();
9180void __kmp_hidden_helper_threads_initz_routine() {
9182 const int gtid = __kmp_register_root(TRUE);
9183 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9184 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9185 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9186 __kmp_hidden_helper_threads_num;
9188 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9193 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9195 __kmp_hidden_helper_threads_deinitz_release();
9215void __kmp_init_nesting_mode() {
9216 int levels = KMP_HW_LAST;
9217 __kmp_nesting_mode_nlevels = levels;
9218 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9219 for (
int i = 0; i < levels; ++i)
9220 __kmp_nesting_nth_level[i] = 0;
9221 if (__kmp_nested_nth.size < levels) {
9222 __kmp_nested_nth.nth =
9223 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9224 __kmp_nested_nth.size = levels;
9229void __kmp_set_nesting_mode_threads() {
9230 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9232 if (__kmp_nesting_mode == 1)
9233 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9234 else if (__kmp_nesting_mode > 1)
9235 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9237 if (__kmp_topology) {
9239 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9240 loc < __kmp_nesting_mode_nlevels;
9241 loc++, hw_level++) {
9242 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9243 if (__kmp_nesting_nth_level[loc] == 1)
9247 if (__kmp_nesting_mode > 1 && loc > 1) {
9248 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9249 int num_cores = __kmp_topology->get_count(core_level);
9250 int upper_levels = 1;
9251 for (
int level = 0; level < loc - 1; ++level)
9252 upper_levels *= __kmp_nesting_nth_level[level];
9253 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9254 __kmp_nesting_nth_level[loc - 1] =
9255 num_cores / __kmp_nesting_nth_level[loc - 2];
9257 __kmp_nesting_mode_nlevels = loc;
9258 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9260 if (__kmp_avail_proc >= 4) {
9261 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9262 __kmp_nesting_nth_level[1] = 2;
9263 __kmp_nesting_mode_nlevels = 2;
9265 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9266 __kmp_nesting_mode_nlevels = 1;
9268 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9270 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9271 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9273 set__nproc(thread, __kmp_nesting_nth_level[0]);
9274 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9275 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9276 if (get__max_active_levels(thread) > 1) {
9278 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9280 if (__kmp_nesting_mode == 1)
9281 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
9286#if !KMP_STATS_ENABLED
9287void __kmp_reset_stats() {}
9290int __kmp_omp_debug_struct_info = FALSE;
9291int __kmp_debugging = FALSE;
9293#if !USE_ITT_BUILD || !USE_ITT_NOTIFY
9294void __kmp_itt_fini_ittlib() {}
9295void __kmp_itt_init_ittlib() {}
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)