14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
28#include "kmp_dispatch_hier.h"
32#include "ompt-specific.h"
35#include "ompd-specific.h"
38#if OMP_PROFILING_SUPPORT
39#include "llvm/Support/TimeProfiler.h"
40static char *ProfileTraceFile =
nullptr;
44#define KMP_USE_PRCTL 0
59#if defined(KMP_GOMP_COMPAT)
60char const __kmp_version_alt_comp[] =
61 KMP_VERSION_PREFIX
"alternative compiler support: yes";
64char const __kmp_version_omp_api[] =
65 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
68char const __kmp_version_lock[] =
69 KMP_VERSION_PREFIX
"lock type: run time selectable";
72#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
77kmp_info_t __kmp_monitor;
82void __kmp_cleanup(
void);
84static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
86static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
87 kmp_internal_control_t *new_icvs,
89#if KMP_AFFINITY_SUPPORTED
90static void __kmp_partition_places(kmp_team_t *team,
91 int update_master_only = 0);
93static void __kmp_do_serial_initialize(
void);
94void __kmp_fork_barrier(
int gtid,
int tid);
95void __kmp_join_barrier(
int gtid);
96void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
97 kmp_internal_control_t *new_icvs,
ident_t *loc);
99#ifdef USE_LOAD_BALANCE
100static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
103static int __kmp_expand_threads(
int nNeed);
105static int __kmp_unregister_root_other_thread(
int gtid);
107static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
108kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
110void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
112void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
117int __kmp_get_global_thread_id() {
119 kmp_info_t **other_threads;
127 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
128 __kmp_nth, __kmp_all_nth));
135 if (!TCR_4(__kmp_init_gtid))
139 if (TCR_4(__kmp_gtid_mode) >= 3) {
140 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
144 if (TCR_4(__kmp_gtid_mode) >= 2) {
145 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
146 return __kmp_gtid_get_specific();
148 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
150 stack_addr = (
char *)&stack_data;
151 other_threads = __kmp_threads;
164 for (i = 0; i < __kmp_threads_capacity; i++) {
166 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
170 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
171 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
175 if (stack_addr <= stack_base) {
176 size_t stack_diff = stack_base - stack_addr;
178 if (stack_diff <= stack_size) {
181 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
189 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
190 "thread, using TLS\n"));
191 i = __kmp_gtid_get_specific();
201 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
202 KMP_FATAL(StackOverflow, i);
205 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
206 if (stack_addr > stack_base) {
207 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
208 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
209 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
212 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
213 stack_base - stack_addr);
217 if (__kmp_storage_map) {
218 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
220 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
221 other_threads[i]->th.th_info.ds.ds_stacksize,
222 "th_%d stack (refinement)", i);
227int __kmp_get_global_thread_id_reg() {
230 if (!__kmp_init_serial) {
234 if (TCR_4(__kmp_gtid_mode) >= 3) {
235 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
239 if (TCR_4(__kmp_gtid_mode) >= 2) {
240 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
241 gtid = __kmp_gtid_get_specific();
244 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
245 gtid = __kmp_get_global_thread_id();
249 if (gtid == KMP_GTID_DNE) {
251 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
252 "Registering a new gtid.\n"));
253 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
254 if (!__kmp_init_serial) {
255 __kmp_do_serial_initialize();
256 gtid = __kmp_gtid_get_specific();
258 gtid = __kmp_register_root(FALSE);
260 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
264 KMP_DEBUG_ASSERT(gtid >= 0);
270void __kmp_check_stack_overlap(kmp_info_t *th) {
272 char *stack_beg = NULL;
273 char *stack_end = NULL;
276 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
277 if (__kmp_storage_map) {
278 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
279 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
281 gtid = __kmp_gtid_from_thread(th);
283 if (gtid == KMP_GTID_MONITOR) {
284 __kmp_print_storage_map_gtid(
285 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
286 "th_%s stack (%s)",
"mon",
287 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
289 __kmp_print_storage_map_gtid(
290 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
291 "th_%d stack (%s)", gtid,
292 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
298 gtid = __kmp_gtid_from_thread(th);
299 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
301 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
302 if (stack_beg == NULL) {
303 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
304 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
307 for (f = 0; f < __kmp_threads_capacity; f++) {
308 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
310 if (f_th && f_th != th) {
311 char *other_stack_end =
312 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
313 char *other_stack_beg =
314 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
315 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
316 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
319 if (__kmp_storage_map)
320 __kmp_print_storage_map_gtid(
321 -1, other_stack_beg, other_stack_end,
322 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
323 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
325 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
331 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
336void __kmp_infinite_loop(
void) {
337 static int done = FALSE;
344#define MAX_MESSAGE 512
346void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
347 char const *format, ...) {
348 char buffer[MAX_MESSAGE];
351 va_start(ap, format);
352 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
353 p2, (
unsigned long)size, format);
354 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
355 __kmp_vprintf(kmp_err, buffer, ap);
356#if KMP_PRINT_DATA_PLACEMENT
359 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
360 if (__kmp_storage_map_verbose) {
361 node = __kmp_get_host_node(p1);
363 __kmp_storage_map_verbose = FALSE;
367 int localProc = __kmp_get_cpu_from_gtid(gtid);
369 const int page_size = KMP_GET_PAGE_SIZE();
371 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
372 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
374 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
377 __kmp_printf_no_lock(
" GTID %d\n", gtid);
386 (
char *)p1 += page_size;
387 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
388 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
392 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
393 (
char *)p1 + (page_size - 1),
394 __kmp_get_host_node(p1));
396 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
397 (
char *)p2 + (page_size - 1),
398 __kmp_get_host_node(p2));
404 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
407 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
410void __kmp_warn(
char const *format, ...) {
411 char buffer[MAX_MESSAGE];
414 if (__kmp_generate_warnings == kmp_warnings_off) {
418 va_start(ap, format);
420 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
421 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
422 __kmp_vprintf(kmp_err, buffer, ap);
423 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
428void __kmp_abort_process() {
430 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
432 if (__kmp_debug_buf) {
433 __kmp_dump_debug_buffer();
436 if (KMP_OS_WINDOWS) {
439 __kmp_global.g.g_abort = SIGABRT;
453 __kmp_unregister_library();
457 __kmp_infinite_loop();
458 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
462void __kmp_abort_thread(
void) {
465 __kmp_infinite_loop();
471static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
472 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
475 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
476 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
478 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
479 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
481 __kmp_print_storage_map_gtid(
482 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
483 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
485 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
486 &thr->th.th_bar[bs_plain_barrier + 1],
487 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
491 &thr->th.th_bar[bs_forkjoin_barrier + 1],
492 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
495#if KMP_FAST_REDUCTION_BARRIER
496 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
497 &thr->th.th_bar[bs_reduction_barrier + 1],
498 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
506static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
507 int team_id,
int num_thr) {
508 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
509 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
513 &team->t.t_bar[bs_last_barrier],
514 sizeof(kmp_balign_team_t) * bs_last_barrier,
515 "%s_%d.t_bar", header, team_id);
517 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
518 &team->t.t_bar[bs_plain_barrier + 1],
519 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
522 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
523 &team->t.t_bar[bs_forkjoin_barrier + 1],
524 sizeof(kmp_balign_team_t),
525 "%s_%d.t_bar[forkjoin]", header, team_id);
527#if KMP_FAST_REDUCTION_BARRIER
528 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
529 &team->t.t_bar[bs_reduction_barrier + 1],
530 sizeof(kmp_balign_team_t),
531 "%s_%d.t_bar[reduction]", header, team_id);
534 __kmp_print_storage_map_gtid(
535 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
536 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
538 __kmp_print_storage_map_gtid(
539 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
540 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
542 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
543 &team->t.t_disp_buffer[num_disp_buff],
544 sizeof(dispatch_shared_info_t) * num_disp_buff,
545 "%s_%d.t_disp_buffer", header, team_id);
548static void __kmp_init_allocator() {
549 __kmp_init_memkind();
550 __kmp_init_target_mem();
552static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
559BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
564 case DLL_PROCESS_ATTACH:
565 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
569 case DLL_PROCESS_DETACH:
570 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
583 if (lpReserved == NULL)
584 __kmp_internal_end_library(__kmp_gtid_get_specific());
588 case DLL_THREAD_ATTACH:
589 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
595 case DLL_THREAD_DETACH:
596 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
598 __kmp_internal_end_thread(__kmp_gtid_get_specific());
609void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
610 int gtid = *gtid_ref;
611#ifdef BUILD_PARALLEL_ORDERED
612 kmp_team_t *team = __kmp_team_from_gtid(gtid);
615 if (__kmp_env_consistency_check) {
616 if (__kmp_threads[gtid]->th.th_root->r.r_active)
617#if KMP_USE_DYNAMIC_LOCK
618 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
620 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
623#ifdef BUILD_PARALLEL_ORDERED
624 if (!team->t.t_serialized) {
626 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
634void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
635 int gtid = *gtid_ref;
636#ifdef BUILD_PARALLEL_ORDERED
637 int tid = __kmp_tid_from_gtid(gtid);
638 kmp_team_t *team = __kmp_team_from_gtid(gtid);
641 if (__kmp_env_consistency_check) {
642 if (__kmp_threads[gtid]->th.th_root->r.r_active)
643 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
645#ifdef BUILD_PARALLEL_ORDERED
646 if (!team->t.t_serialized) {
651 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
661int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
666 if (!TCR_4(__kmp_init_parallel))
667 __kmp_parallel_initialize();
668 __kmp_resume_if_soft_paused();
670 th = __kmp_threads[gtid];
671 team = th->th.th_team;
674 th->th.th_ident = id_ref;
676 if (team->t.t_serialized) {
679 kmp_int32 old_this = th->th.th_local.this_construct;
681 ++th->th.th_local.this_construct;
685 if (team->t.t_construct == old_this) {
686 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
687 th->th.th_local.this_construct);
690 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
691 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
692 team->t.t_active_level == 1) {
694 __kmp_itt_metadata_single(id_ref);
699 if (__kmp_env_consistency_check) {
700 if (status && push_ws) {
701 __kmp_push_workshare(gtid, ct_psingle, id_ref);
703 __kmp_check_workshare(gtid, ct_psingle, id_ref);
708 __kmp_itt_single_start(gtid);
714void __kmp_exit_single(
int gtid) {
716 __kmp_itt_single_end(gtid);
718 if (__kmp_env_consistency_check)
719 __kmp_pop_workshare(gtid, ct_psingle, NULL);
728static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
729 int master_tid,
int set_nthreads,
733 KMP_DEBUG_ASSERT(__kmp_init_serial);
734 KMP_DEBUG_ASSERT(root && parent_team);
735 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
739 new_nthreads = set_nthreads;
740 if (!get__dynamic_2(parent_team, master_tid)) {
743#ifdef USE_LOAD_BALANCE
744 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
745 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
746 if (new_nthreads == 1) {
747 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
748 "reservation to 1 thread\n",
752 if (new_nthreads < set_nthreads) {
753 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
754 "reservation to %d threads\n",
755 master_tid, new_nthreads));
759 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
760 new_nthreads = __kmp_avail_proc - __kmp_nth +
761 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
762 if (new_nthreads <= 1) {
763 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
764 "reservation to 1 thread\n",
768 if (new_nthreads < set_nthreads) {
769 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
770 "reservation to %d threads\n",
771 master_tid, new_nthreads));
773 new_nthreads = set_nthreads;
775 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
776 if (set_nthreads > 2) {
777 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
778 new_nthreads = (new_nthreads % set_nthreads) + 1;
779 if (new_nthreads == 1) {
780 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
781 "reservation to 1 thread\n",
785 if (new_nthreads < set_nthreads) {
786 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
787 "reservation to %d threads\n",
788 master_tid, new_nthreads));
796 if (__kmp_nth + new_nthreads -
797 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
799 int tl_nthreads = __kmp_max_nth - __kmp_nth +
800 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
801 if (tl_nthreads <= 0) {
806 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
807 __kmp_reserve_warn = 1;
808 __kmp_msg(kmp_ms_warning,
809 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
810 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
812 if (tl_nthreads == 1) {
813 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
814 "reduced reservation to 1 thread\n",
818 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
819 "reservation to %d threads\n",
820 master_tid, tl_nthreads));
821 new_nthreads = tl_nthreads;
825 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
826 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
827 if (cg_nthreads + new_nthreads -
828 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
830 int tl_nthreads = max_cg_threads - cg_nthreads +
831 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
832 if (tl_nthreads <= 0) {
837 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
838 __kmp_reserve_warn = 1;
839 __kmp_msg(kmp_ms_warning,
840 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
841 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
843 if (tl_nthreads == 1) {
844 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
845 "reduced reservation to 1 thread\n",
849 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
850 "reservation to %d threads\n",
851 master_tid, tl_nthreads));
852 new_nthreads = tl_nthreads;
858 capacity = __kmp_threads_capacity;
859 if (TCR_PTR(__kmp_threads[0]) == NULL) {
865 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
866 capacity -= __kmp_hidden_helper_threads_num;
868 if (__kmp_nth + new_nthreads -
869 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
872 int slotsRequired = __kmp_nth + new_nthreads -
873 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
875 int slotsAdded = __kmp_expand_threads(slotsRequired);
876 if (slotsAdded < slotsRequired) {
878 new_nthreads -= (slotsRequired - slotsAdded);
879 KMP_ASSERT(new_nthreads >= 1);
882 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
883 __kmp_reserve_warn = 1;
884 if (__kmp_tp_cached) {
885 __kmp_msg(kmp_ms_warning,
886 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
887 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
888 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
890 __kmp_msg(kmp_ms_warning,
891 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
892 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
899 if (new_nthreads == 1) {
901 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
902 "dead roots and rechecking; requested %d threads\n",
903 __kmp_get_gtid(), set_nthreads));
905 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
907 __kmp_get_gtid(), new_nthreads, set_nthreads));
916static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
917 kmp_info_t *master_th,
int master_gtid,
918 int fork_teams_workers) {
922 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
923 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
927 master_th->th.th_info.ds.ds_tid = 0;
928 master_th->th.th_team = team;
929 master_th->th.th_team_nproc = team->t.t_nproc;
930 master_th->th.th_team_master = master_th;
931 master_th->th.th_team_serialized = FALSE;
932 master_th->th.th_dispatch = &team->t.t_dispatch[0];
935#if KMP_NESTED_HOT_TEAMS
937 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
940 int level = team->t.t_active_level - 1;
941 if (master_th->th.th_teams_microtask) {
942 if (master_th->th.th_teams_size.nteams > 1) {
946 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
947 master_th->th.th_teams_level == team->t.t_level) {
952 if (level < __kmp_hot_teams_max_level) {
953 if (hot_teams[level].hot_team) {
955 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
959 hot_teams[level].hot_team = team;
960 hot_teams[level].hot_team_nth = team->t.t_nproc;
967 use_hot_team = team == root->r.r_hot_team;
972 team->t.t_threads[0] = master_th;
973 __kmp_initialize_info(master_th, team, 0, master_gtid);
976 for (i = 1; i < team->t.t_nproc; i++) {
979 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
980 team->t.t_threads[i] = thr;
981 KMP_DEBUG_ASSERT(thr);
982 KMP_DEBUG_ASSERT(thr->th.th_team == team);
984 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
985 "T#%d(%d:%d) join =%llu, plain=%llu\n",
986 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
987 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
988 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
989 team->t.t_bar[bs_plain_barrier].b_arrived));
990 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
991 thr->th.th_teams_level = master_th->th.th_teams_level;
992 thr->th.th_teams_size = master_th->th.th_teams_size;
995 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
996 for (b = 0; b < bs_last_barrier; ++b) {
997 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
998 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1000 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1006#if KMP_AFFINITY_SUPPORTED
1010 if (!fork_teams_workers) {
1011 __kmp_partition_places(team);
1016 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1017 for (i = 0; i < team->t.t_nproc; i++) {
1018 kmp_info_t *thr = team->t.t_threads[i];
1019 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1020 thr->th.th_prev_level != team->t.t_level) {
1021 team->t.t_display_affinity = 1;
1030#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1034inline static void propagateFPControl(kmp_team_t *team) {
1035 if (__kmp_inherit_fp_control) {
1036 kmp_int16 x87_fpu_control_word;
1040 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1041 __kmp_store_mxcsr(&mxcsr);
1042 mxcsr &= KMP_X86_MXCSR_MASK;
1053 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1054 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1057 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1061 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1067inline static void updateHWFPControl(kmp_team_t *team) {
1068 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1071 kmp_int16 x87_fpu_control_word;
1073 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1074 __kmp_store_mxcsr(&mxcsr);
1075 mxcsr &= KMP_X86_MXCSR_MASK;
1077 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1078 __kmp_clear_x87_fpu_status_word();
1079 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1082 if (team->t.t_mxcsr != mxcsr) {
1083 __kmp_load_mxcsr(&team->t.t_mxcsr);
1088#define propagateFPControl(x) ((void)0)
1089#define updateHWFPControl(x) ((void)0)
1092static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1097void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1098 kmp_info_t *this_thr;
1099 kmp_team_t *serial_team;
1101 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1108 if (!TCR_4(__kmp_init_parallel))
1109 __kmp_parallel_initialize();
1110 __kmp_resume_if_soft_paused();
1112 this_thr = __kmp_threads[global_tid];
1113 serial_team = this_thr->th.th_serial_team;
1116 KMP_DEBUG_ASSERT(serial_team);
1119 if (__kmp_tasking_mode != tskm_immediate_exec) {
1121 this_thr->th.th_task_team ==
1122 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1123 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1125 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1126 "team %p, new task_team = NULL\n",
1127 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1128 this_thr->th.th_task_team = NULL;
1131 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1132 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1133 proc_bind = proc_bind_false;
1134 }
else if (proc_bind == proc_bind_default) {
1137 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1140 this_thr->th.th_set_proc_bind = proc_bind_default;
1143 ompt_data_t ompt_parallel_data = ompt_data_none;
1144 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1145 if (ompt_enabled.enabled &&
1146 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1148 ompt_task_info_t *parent_task_info;
1149 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1151 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1152 if (ompt_enabled.ompt_callback_parallel_begin) {
1155 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1156 &(parent_task_info->task_data), &(parent_task_info->frame),
1157 &ompt_parallel_data, team_size,
1158 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1163 if (this_thr->th.th_team != serial_team) {
1165 int level = this_thr->th.th_team->t.t_level;
1167 if (serial_team->t.t_serialized) {
1170 kmp_team_t *new_team;
1172 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1175 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1179 proc_bind, &this_thr->th.th_current_task->td_icvs,
1180 0 USE_NESTED_HOT_ARG(NULL));
1181 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1182 KMP_ASSERT(new_team);
1185 new_team->t.t_threads[0] = this_thr;
1186 new_team->t.t_parent = this_thr->th.th_team;
1187 serial_team = new_team;
1188 this_thr->th.th_serial_team = serial_team;
1192 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1193 global_tid, serial_team));
1201 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1202 global_tid, serial_team));
1206 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1207 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1208 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1209 serial_team->t.t_ident = loc;
1210 serial_team->t.t_serialized = 1;
1211 serial_team->t.t_nproc = 1;
1212 serial_team->t.t_parent = this_thr->th.th_team;
1213 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1214 this_thr->th.th_team = serial_team;
1215 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1217 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1218 this_thr->th.th_current_task));
1219 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1220 this_thr->th.th_current_task->td_flags.executing = 0;
1222 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1227 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1228 &this_thr->th.th_current_task->td_parent->td_icvs);
1232 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1233 this_thr->th.th_current_task->td_icvs.nproc =
1234 __kmp_nested_nth.nth[level + 1];
1237 if (__kmp_nested_proc_bind.used &&
1238 (level + 1 < __kmp_nested_proc_bind.used)) {
1239 this_thr->th.th_current_task->td_icvs.proc_bind =
1240 __kmp_nested_proc_bind.bind_types[level + 1];
1244 serial_team->t.t_pkfn = (microtask_t)(~0);
1246 this_thr->th.th_info.ds.ds_tid = 0;
1249 this_thr->th.th_team_nproc = 1;
1250 this_thr->th.th_team_master = this_thr;
1251 this_thr->th.th_team_serialized = 1;
1253 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1254 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1255 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1257 propagateFPControl(serial_team);
1260 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1261 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1262 serial_team->t.t_dispatch->th_disp_buffer =
1263 (dispatch_private_info_t *)__kmp_allocate(
1264 sizeof(dispatch_private_info_t));
1266 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1273 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1274 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1275 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1276 ++serial_team->t.t_serialized;
1277 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1280 int level = this_thr->th.th_team->t.t_level;
1283 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1284 this_thr->th.th_current_task->td_icvs.nproc =
1285 __kmp_nested_nth.nth[level + 1];
1287 serial_team->t.t_level++;
1288 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1289 "of serial team %p to %d\n",
1290 global_tid, serial_team, serial_team->t.t_level));
1293 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1295 dispatch_private_info_t *disp_buffer =
1296 (dispatch_private_info_t *)__kmp_allocate(
1297 sizeof(dispatch_private_info_t));
1298 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1299 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1301 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1305 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1309 if (__kmp_display_affinity) {
1310 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1311 this_thr->th.th_prev_num_threads != 1) {
1313 __kmp_aux_display_affinity(global_tid, NULL);
1314 this_thr->th.th_prev_level = serial_team->t.t_level;
1315 this_thr->th.th_prev_num_threads = 1;
1319 if (__kmp_env_consistency_check)
1320 __kmp_push_parallel(global_tid, NULL);
1322 serial_team->t.ompt_team_info.master_return_address = codeptr;
1323 if (ompt_enabled.enabled &&
1324 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1325 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1326 OMPT_GET_FRAME_ADDRESS(0);
1328 ompt_lw_taskteam_t lw_taskteam;
1329 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1330 &ompt_parallel_data, codeptr);
1332 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1336 if (ompt_enabled.ompt_callback_implicit_task) {
1337 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1338 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1339 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1340 ompt_task_implicit);
1341 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1342 __kmp_tid_from_gtid(global_tid);
1346 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1347 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1348 OMPT_GET_FRAME_ADDRESS(0);
1355int __kmp_fork_call(
ident_t *loc,
int gtid,
1356 enum fork_context_e call_context,
1357 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1362 int master_this_cons;
1364 kmp_team_t *parent_team;
1365 kmp_info_t *master_th;
1369 int master_set_numthreads;
1373#if KMP_NESTED_HOT_TEAMS
1374 kmp_hot_team_ptr_t **p_hot_teams;
1377 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1380 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1381 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1384 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1386 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1387 __kmp_stkpadding += (short)((kmp_int64)dummy);
1393 if (!TCR_4(__kmp_init_parallel))
1394 __kmp_parallel_initialize();
1395 __kmp_resume_if_soft_paused();
1398 master_th = __kmp_threads[gtid];
1400 parent_team = master_th->th.th_team;
1401 master_tid = master_th->th.th_info.ds.ds_tid;
1402 master_this_cons = master_th->th.th_local.this_construct;
1403 root = master_th->th.th_root;
1404 master_active = root->r.r_active;
1405 master_set_numthreads = master_th->th.th_set_nproc;
1408 ompt_data_t ompt_parallel_data = ompt_data_none;
1409 ompt_data_t *parent_task_data;
1410 ompt_frame_t *ompt_frame;
1411 ompt_data_t *implicit_task_data;
1412 void *return_address = NULL;
1414 if (ompt_enabled.enabled) {
1415 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1417 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1422 __kmp_assign_root_init_mask();
1425 level = parent_team->t.t_level;
1427 active_level = parent_team->t.t_active_level;
1429 teams_level = master_th->th.th_teams_level;
1430#if KMP_NESTED_HOT_TEAMS
1431 p_hot_teams = &master_th->th.th_hot_teams;
1432 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1433 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1434 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1435 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1437 (*p_hot_teams)[0].hot_team_nth = 1;
1442 if (ompt_enabled.enabled) {
1443 if (ompt_enabled.ompt_callback_parallel_begin) {
1444 int team_size = master_set_numthreads
1445 ? master_set_numthreads
1446 : get__nproc_2(parent_team, master_tid);
1447 int flags = OMPT_INVOKER(call_context) |
1448 ((microtask == (microtask_t)__kmp_teams_master)
1449 ? ompt_parallel_league
1450 : ompt_parallel_team);
1451 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1452 parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,
1455 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1459 master_th->th.th_ident = loc;
1461 if (master_th->th.th_teams_microtask && ap &&
1462 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1466 parent_team->t.t_ident = loc;
1467 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1468 parent_team->t.t_argc = argc;
1469 argv = (
void **)parent_team->t.t_argv;
1470 for (i = argc - 1; i >= 0; --i)
1471 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1473 if (parent_team == master_th->th.th_serial_team) {
1476 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1478 if (call_context == fork_context_gnu) {
1481 parent_team->t.t_serialized--;
1486 parent_team->t.t_pkfn = microtask;
1491 void **exit_frame_p;
1493 ompt_lw_taskteam_t lw_taskteam;
1495 if (ompt_enabled.enabled) {
1496 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1497 &ompt_parallel_data, return_address);
1498 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1500 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1504 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1505 if (ompt_enabled.ompt_callback_implicit_task) {
1506 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1507 __kmp_tid_from_gtid(gtid);
1508 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1509 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1510 implicit_task_data, 1,
1511 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1515 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1517 exit_frame_p = &dummy;
1522 parent_team->t.t_serialized--;
1525 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1526 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1527 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1536 if (ompt_enabled.enabled) {
1537 *exit_frame_p = NULL;
1538 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1539 if (ompt_enabled.ompt_callback_implicit_task) {
1540 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1541 ompt_scope_end, NULL, implicit_task_data, 1,
1542 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1544 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1545 __ompt_lw_taskteam_unlink(master_th);
1546 if (ompt_enabled.ompt_callback_parallel_end) {
1547 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1548 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1549 OMPT_INVOKER(call_context) | ompt_parallel_team,
1552 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1558 parent_team->t.t_pkfn = microtask;
1559 parent_team->t.t_invoke = invoker;
1560 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1561 parent_team->t.t_active_level++;
1562 parent_team->t.t_level++;
1563 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1566 if (ompt_enabled.enabled) {
1567 ompt_lw_taskteam_t lw_taskteam;
1568 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1569 &ompt_parallel_data, return_address);
1570 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1575 if (master_set_numthreads) {
1576 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1578 kmp_info_t **other_threads = parent_team->t.t_threads;
1581 int old_proc = master_th->th.th_teams_size.nth;
1582 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
1584 __kmp_resize_dist_barrier(parent_team, old_proc,
1585 master_set_numthreads);
1586 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1588 parent_team->t.t_nproc = master_set_numthreads;
1589 for (i = 0; i < master_set_numthreads; ++i) {
1590 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1594 master_th->th.th_set_nproc = 0;
1598 if (__kmp_debugging) {
1599 int nth = __kmp_omp_num_threads(loc);
1601 master_set_numthreads = nth;
1607 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1609 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1610 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1611 proc_bind = proc_bind_false;
1614 if (proc_bind == proc_bind_default) {
1615 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1622 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1623 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1624 master_th->th.th_current_task->td_icvs.proc_bind)) {
1625 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1628 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1630 if (proc_bind_icv != proc_bind_default &&
1631 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1632 kmp_info_t **other_threads = parent_team->t.t_threads;
1633 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1634 other_threads[i]->th.th_current_task->td_icvs.proc_bind =
1639 master_th->th.th_set_proc_bind = proc_bind_default;
1641#if USE_ITT_BUILD && USE_ITT_NOTIFY
1642 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1644 __kmp_forkjoin_frames_mode == 3 &&
1645 parent_team->t.t_active_level == 1
1646 && master_th->th.th_teams_size.nteams == 1) {
1647 kmp_uint64 tmp_time = __itt_get_timestamp();
1648 master_th->th.th_frame_time = tmp_time;
1649 parent_team->t.t_region_time = tmp_time;
1651 if (__itt_stack_caller_create_ptr) {
1652 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1654 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1657#if KMP_AFFINITY_SUPPORTED
1658 __kmp_partition_places(parent_team);
1661 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, "
1662 "master_th=%p, gtid=%d\n",
1663 root, parent_team, master_th, gtid));
1664 __kmp_internal_fork(loc, gtid, parent_team);
1665 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, "
1666 "master_th=%p, gtid=%d\n",
1667 root, parent_team, master_th, gtid));
1669 if (call_context == fork_context_gnu)
1673 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1674 parent_team->t.t_id, parent_team->t.t_pkfn));
1676 if (!parent_team->t.t_invoke(gtid)) {
1677 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1679 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1680 parent_team->t.t_id, parent_team->t.t_pkfn));
1683 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1689 if (__kmp_tasking_mode != tskm_immediate_exec) {
1690 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1691 parent_team->t.t_task_team[master_th->th.th_task_state]);
1698 int enter_teams = 0;
1699 if (parent_team->t.t_active_level >=
1700 master_th->th.th_current_task->td_icvs.max_active_levels) {
1703 enter_teams = ((ap == NULL && active_level == 0) ||
1704 (ap && teams_level > 0 && teams_level == level));
1705 nthreads = master_set_numthreads
1706 ? master_set_numthreads
1708 : get__nproc_2(parent_team, master_tid);
1713 if ((get__max_active_levels(master_th) == 1 &&
1714 (root->r.r_in_parallel && !enter_teams)) ||
1715 (__kmp_library == library_serial)) {
1716 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d"
1724 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1729 nthreads = __kmp_reserve_threads(root, parent_team, master_tid,
1730 nthreads, enter_teams);
1731 if (nthreads == 1) {
1735 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1739 KMP_DEBUG_ASSERT(nthreads > 0);
1742 master_th->th.th_set_nproc = 0;
1745 if (nthreads == 1) {
1747#if KMP_OS_LINUX && \
1748 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1751 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1756 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1761 master_th->th.th_serial_team->t.t_pkfn = microtask;
1764 if (call_context == fork_context_intel) {
1766 master_th->th.th_serial_team->t.t_ident = loc;
1769 master_th->th.th_serial_team->t.t_level--;
1774 void **exit_frame_p;
1775 ompt_task_info_t *task_info;
1777 ompt_lw_taskteam_t lw_taskteam;
1779 if (ompt_enabled.enabled) {
1780 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1781 &ompt_parallel_data, return_address);
1783 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1786 task_info = OMPT_CUR_TASK_INFO(master_th);
1787 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1788 if (ompt_enabled.ompt_callback_implicit_task) {
1789 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1790 __kmp_tid_from_gtid(gtid);
1791 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1792 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1793 &(task_info->task_data), 1,
1794 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1795 ompt_task_implicit);
1799 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1801 exit_frame_p = &dummy;
1806 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1807 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1808 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1809 parent_team->t.t_argv
1818 if (ompt_enabled.enabled) {
1819 *exit_frame_p = NULL;
1820 if (ompt_enabled.ompt_callback_implicit_task) {
1821 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1822 ompt_scope_end, NULL, &(task_info->task_data), 1,
1823 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1824 ompt_task_implicit);
1826 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1827 __ompt_lw_taskteam_unlink(master_th);
1828 if (ompt_enabled.ompt_callback_parallel_end) {
1829 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1830 &ompt_parallel_data, parent_task_data,
1831 OMPT_INVOKER(call_context) | ompt_parallel_team,
1834 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1837 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1838 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1839 master_th->th.th_serial_team);
1840 team = master_th->th.th_team;
1842 team->t.t_invoke = invoker;
1843 __kmp_alloc_argv_entries(argc, team, TRUE);
1844 team->t.t_argc = argc;
1845 argv = (
void **)team->t.t_argv;
1847 for (i = argc - 1; i >= 0; --i)
1848 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1850 for (i = 0; i < argc; ++i)
1852 argv[i] = parent_team->t.t_argv[i];
1860 if (ompt_enabled.enabled) {
1861 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1862 if (ompt_enabled.ompt_callback_implicit_task) {
1863 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1864 ompt_scope_end, NULL, &(task_info->task_data), 0,
1865 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1867 if (ompt_enabled.ompt_callback_parallel_end) {
1868 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1869 &ompt_parallel_data, parent_task_data,
1870 OMPT_INVOKER(call_context) | ompt_parallel_league,
1873 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1878 for (i = argc - 1; i >= 0; --i)
1879 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1884 void **exit_frame_p;
1885 ompt_task_info_t *task_info;
1887 ompt_lw_taskteam_t lw_taskteam;
1889 if (ompt_enabled.enabled) {
1890 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1891 &ompt_parallel_data, return_address);
1892 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1894 task_info = OMPT_CUR_TASK_INFO(master_th);
1895 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1898 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1899 if (ompt_enabled.ompt_callback_implicit_task) {
1900 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1901 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1902 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1903 ompt_task_implicit);
1904 OMPT_CUR_TASK_INFO(master_th)->thread_num =
1905 __kmp_tid_from_gtid(gtid);
1909 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1911 exit_frame_p = &dummy;
1916 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1917 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1918 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1927 if (ompt_enabled.enabled) {
1928 *exit_frame_p = NULL;
1929 if (ompt_enabled.ompt_callback_implicit_task) {
1930 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1931 ompt_scope_end, NULL, &(task_info->task_data), 1,
1932 OMPT_CUR_TASK_INFO(master_th)->thread_num,
1933 ompt_task_implicit);
1936 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1937 __ompt_lw_taskteam_unlink(master_th);
1938 if (ompt_enabled.ompt_callback_parallel_end) {
1939 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1940 &ompt_parallel_data, parent_task_data,
1941 OMPT_INVOKER(call_context) | ompt_parallel_team,
1944 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1948 }
else if (call_context == fork_context_gnu) {
1950 if (ompt_enabled.enabled) {
1951 ompt_lw_taskteam_t lwt;
1952 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1955 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1956 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1962 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1965 KMP_ASSERT2(call_context < fork_context_last,
1966 "__kmp_fork_call: unknown fork_context parameter");
1969 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1976 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "
1977 "curtask=%p, curtask_max_aclevel=%d\n",
1978 parent_team->t.t_active_level, master_th,
1979 master_th->th.th_current_task,
1980 master_th->th.th_current_task->td_icvs.max_active_levels));
1984 master_th->th.th_current_task->td_flags.executing = 0;
1986 if (!master_th->th.th_teams_microtask || level > teams_level) {
1988 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1992 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1993 if ((level + 1 < __kmp_nested_nth.used) &&
1994 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1995 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2001 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2003 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
2004 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
2005 proc_bind = proc_bind_false;
2009 if (proc_bind == proc_bind_default) {
2010 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
2013 if (master_th->th.th_teams_microtask &&
2014 microtask == (microtask_t)__kmp_teams_master) {
2015 proc_bind = __kmp_teams_proc_bind;
2021 if ((level + 1 < __kmp_nested_proc_bind.used) &&
2022 (__kmp_nested_proc_bind.bind_types[level + 1] !=
2023 master_th->th.th_current_task->td_icvs.proc_bind)) {
2026 if (!master_th->th.th_teams_microtask ||
2027 !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
2028 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2033 master_th->th.th_set_proc_bind = proc_bind_default;
2035 if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {
2036 kmp_internal_control_t new_icvs;
2037 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2038 new_icvs.next = NULL;
2039 if (nthreads_icv > 0) {
2040 new_icvs.nproc = nthreads_icv;
2042 if (proc_bind_icv != proc_bind_default) {
2043 new_icvs.proc_bind = proc_bind_icv;
2047 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2048 team = __kmp_allocate_team(root, nthreads, nthreads,
2052 proc_bind, &new_icvs,
2053 argc USE_NESTED_HOT_ARG(master_th));
2054 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2055 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);
2058 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2059 team = __kmp_allocate_team(root, nthreads, nthreads,
2064 &master_th->th.th_current_task->td_icvs,
2065 argc USE_NESTED_HOT_ARG(master_th));
2066 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)
2067 copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,
2068 &master_th->th.th_current_task->td_icvs);
2071 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2074 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2075 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2076 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2077 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2078 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2080 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2083 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2085 if (!master_th->th.th_teams_microtask || level > teams_level) {
2086 int new_level = parent_team->t.t_level + 1;
2087 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2088 new_level = parent_team->t.t_active_level + 1;
2089 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2092 int new_level = parent_team->t.t_level;
2093 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2094 new_level = parent_team->t.t_active_level;
2095 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2097 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2099 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2101 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2102 KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
2105 propagateFPControl(team);
2107 if (ompd_state & OMPD_ENABLE_BP)
2108 ompd_bp_parallel_begin();
2111 if (__kmp_tasking_mode != tskm_immediate_exec) {
2114 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2115 parent_team->t.t_task_team[master_th->th.th_task_state]);
2116 KA_TRACE(20, (
"__kmp_fork_call: Primary T#%d pushing task_team %p / team "
2117 "%p, new task_team %p / team %p\n",
2118 __kmp_gtid_from_thread(master_th),
2119 master_th->th.th_task_team, parent_team,
2120 team->t.t_task_team[master_th->th.th_task_state], team));
2122 if (active_level || master_th->th.th_task_team) {
2124 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2125 if (master_th->th.th_task_state_top >=
2126 master_th->th.th_task_state_stack_sz) {
2127 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2128 kmp_uint8 *old_stack, *new_stack;
2130 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2131 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2132 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2134 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2138 old_stack = master_th->th.th_task_state_memo_stack;
2139 master_th->th.th_task_state_memo_stack = new_stack;
2140 master_th->th.th_task_state_stack_sz = new_size;
2141 __kmp_free(old_stack);
2145 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2146 master_th->th.th_task_state;
2147 master_th->th.th_task_state_top++;
2148#if KMP_NESTED_HOT_TEAMS
2149 if (master_th->th.th_hot_teams &&
2150 active_level < __kmp_hot_teams_max_level &&
2151 team == master_th->th.th_hot_teams[active_level].hot_team) {
2153 master_th->th.th_task_state =
2155 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2158 master_th->th.th_task_state = 0;
2159#if KMP_NESTED_HOT_TEAMS
2163#if !KMP_NESTED_HOT_TEAMS
2164 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2165 (team == root->r.r_hot_team));
2171 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2172 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2174 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2175 (team->t.t_master_tid == 0 &&
2176 (team->t.t_parent == root->r.r_root_team ||
2177 team->t.t_parent->t.t_serialized)));
2181 argv = (
void **)team->t.t_argv;
2183 for (i = argc - 1; i >= 0; --i) {
2184 void *new_argv = va_arg(kmp_va_deref(ap),
void *);
2185 KMP_CHECK_UPDATE(*argv, new_argv);
2189 for (i = 0; i < argc; ++i) {
2191 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2196 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2197 if (!root->r.r_active)
2198 root->r.r_active = TRUE;
2200 __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
2201 __kmp_setup_icv_copy(team, nthreads,
2202 &master_th->th.th_current_task->td_icvs, loc);
2205 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
2208 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2211 if (team->t.t_active_level == 1
2212 && !master_th->th.th_teams_microtask) {
2214 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2215 (__kmp_forkjoin_frames_mode == 3 ||
2216 __kmp_forkjoin_frames_mode == 1)) {
2217 kmp_uint64 tmp_time = 0;
2218 if (__itt_get_timestamp_ptr)
2219 tmp_time = __itt_get_timestamp();
2221 master_th->th.th_frame_time = tmp_time;
2222 if (__kmp_forkjoin_frames_mode == 3)
2223 team->t.t_region_time = tmp_time;
2227 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2228 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2230 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2236 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2239 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2240 root, team, master_th, gtid));
2243 if (__itt_stack_caller_create_ptr) {
2246 KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);
2247 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2248 }
else if (parent_team->t.t_serialized) {
2253 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
2254 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
2262 __kmp_internal_fork(loc, gtid, team);
2263 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, "
2264 "master_th=%p, gtid=%d\n",
2265 root, team, master_th, gtid));
2268 if (call_context == fork_context_gnu) {
2269 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2274 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2275 team->t.t_id, team->t.t_pkfn));
2278#if KMP_STATS_ENABLED
2282 KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);
2286 if (!team->t.t_invoke(gtid)) {
2287 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
2290#if KMP_STATS_ENABLED
2293 KMP_SET_THREAD_STATE(previous_state);
2297 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2298 team->t.t_id, team->t.t_pkfn));
2301 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2303 if (ompt_enabled.enabled) {
2304 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2312static inline void __kmp_join_restore_state(kmp_info_t *thread,
2315 thread->th.ompt_thread_info.state =
2316 ((team->t.t_serialized) ? ompt_state_work_serial
2317 : ompt_state_work_parallel);
2320static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2321 kmp_team_t *team, ompt_data_t *parallel_data,
2322 int flags,
void *codeptr) {
2323 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2324 if (ompt_enabled.ompt_callback_parallel_end) {
2325 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2326 parallel_data, &(task_info->task_data), flags, codeptr);
2329 task_info->frame.enter_frame = ompt_data_none;
2330 __kmp_join_restore_state(thread, team);
2334void __kmp_join_call(
ident_t *loc,
int gtid
2337 enum fork_context_e fork_context
2341 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2343 kmp_team_t *parent_team;
2344 kmp_info_t *master_th;
2348 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2351 master_th = __kmp_threads[gtid];
2352 root = master_th->th.th_root;
2353 team = master_th->th.th_team;
2354 parent_team = team->t.t_parent;
2356 master_th->th.th_ident = loc;
2359 void *team_microtask = (
void *)team->t.t_pkfn;
2363 if (ompt_enabled.enabled &&
2364 !(team->t.t_serialized && fork_context == fork_context_gnu)) {
2365 master_th->th.ompt_thread_info.state = ompt_state_overhead;
2370 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2371 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, "
2372 "th_task_team = %p\n",
2373 __kmp_gtid_from_thread(master_th), team,
2374 team->t.t_task_team[master_th->th.th_task_state],
2375 master_th->th.th_task_team));
2376 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2377 team->t.t_task_team[master_th->th.th_task_state]);
2381 if (team->t.t_serialized) {
2382 if (master_th->th.th_teams_microtask) {
2384 int level = team->t.t_level;
2385 int tlevel = master_th->th.th_teams_level;
2386 if (level == tlevel) {
2390 }
else if (level == tlevel + 1) {
2394 team->t.t_serialized++;
2400 if (ompt_enabled.enabled) {
2401 if (fork_context == fork_context_gnu) {
2402 __ompt_lw_taskteam_unlink(master_th);
2404 __kmp_join_restore_state(master_th, parent_team);
2411 master_active = team->t.t_master_active;
2416 __kmp_internal_join(loc, gtid, team);
2418 if (__itt_stack_caller_create_ptr) {
2419 KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);
2421 __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
2422 team->t.t_stack_id = NULL;
2426 master_th->th.th_task_state =
2429 if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {
2430 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);
2434 __kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);
2435 parent_team->t.t_stack_id = NULL;
2439 if (team->t.t_nproc > 1 &&
2440 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2441 team->t.b->update_num_threads(team->t.t_nproc);
2442 __kmp_add_threads_to_team(team, team->t.t_nproc);
2449 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2450 void *codeptr = team->t.ompt_team_info.master_return_address;
2455 if (team->t.t_active_level == 1 &&
2456 (!master_th->th.th_teams_microtask ||
2457 master_th->th.th_teams_size.nteams == 1)) {
2458 master_th->th.th_ident = loc;
2461 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2462 __kmp_forkjoin_frames_mode == 3)
2463 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2464 master_th->th.th_frame_time, 0, loc,
2465 master_th->th.th_team_nproc, 1);
2466 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2467 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2468 __kmp_itt_region_joined(gtid);
2472#if KMP_AFFINITY_SUPPORTED
2475 master_th->th.th_first_place = team->t.t_first_place;
2476 master_th->th.th_last_place = team->t.t_last_place;
2480 if (master_th->th.th_teams_microtask && !exit_teams &&
2481 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2482 team->t.t_level == master_th->th.th_teams_level + 1) {
2487 ompt_data_t ompt_parallel_data = ompt_data_none;
2488 if (ompt_enabled.enabled) {
2489 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2490 if (ompt_enabled.ompt_callback_implicit_task) {
2491 int ompt_team_size = team->t.t_nproc;
2492 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2493 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2494 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
2496 task_info->frame.exit_frame = ompt_data_none;
2497 task_info->task_data = ompt_data_none;
2498 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
2499 __ompt_lw_taskteam_unlink(master_th);
2504 team->t.t_active_level--;
2505 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2511 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2512 int old_num = master_th->th.th_team_nproc;
2513 int new_num = master_th->th.th_teams_size.nth;
2514 kmp_info_t **other_threads = team->t.t_threads;
2515 team->t.t_nproc = new_num;
2516 for (
int i = 0; i < old_num; ++i) {
2517 other_threads[i]->th.th_team_nproc = new_num;
2520 for (
int i = old_num; i < new_num; ++i) {
2522 KMP_DEBUG_ASSERT(other_threads[i]);
2523 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2524 for (
int b = 0; b < bs_last_barrier; ++b) {
2525 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2526 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2528 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2531 if (__kmp_tasking_mode != tskm_immediate_exec) {
2533 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2539 if (ompt_enabled.enabled) {
2540 __kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,
2541 OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);
2549 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2550 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2552 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2557 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2559 if (!master_th->th.th_teams_microtask ||
2560 team->t.t_level > master_th->th.th_teams_level) {
2562 KMP_ATOMIC_DEC(&root->r.r_in_parallel);
2564 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2567 if (ompt_enabled.enabled) {
2568 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2569 if (ompt_enabled.ompt_callback_implicit_task) {
2570 int flags = (team_microtask == (
void *)__kmp_teams_master)
2572 : ompt_task_implicit;
2573 int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;
2574 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2575 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2576 OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);
2578 task_info->frame.exit_frame = ompt_data_none;
2579 task_info->task_data = ompt_data_none;
2583 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2585 __kmp_pop_current_task_from_thread(master_th);
2587 master_th->th.th_def_allocator = team->t.t_def_allocator;
2590 if (ompd_state & OMPD_ENABLE_BP)
2591 ompd_bp_parallel_end();
2593 updateHWFPControl(team);
2595 if (root->r.r_active != master_active)
2596 root->r.r_active = master_active;
2598 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2606 master_th->th.th_team = parent_team;
2607 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2608 master_th->th.th_team_master = parent_team->t.t_threads[0];
2609 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2612 if (parent_team->t.t_serialized &&
2613 parent_team != master_th->th.th_serial_team &&
2614 parent_team != root->r.r_root_team) {
2615 __kmp_free_team(root,
2616 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2617 master_th->th.th_serial_team = parent_team;
2620 if (__kmp_tasking_mode != tskm_immediate_exec) {
2621 if (master_th->th.th_task_state_top >
2623 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2625 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2626 master_th->th.th_task_state;
2627 --master_th->th.th_task_state_top;
2629 master_th->th.th_task_state =
2631 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2634 master_th->th.th_task_team =
2635 parent_team->t.t_task_team[master_th->th.th_task_state];
2637 (
"__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",
2638 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2645 master_th->th.th_current_task->td_flags.executing = 1;
2647 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2649#if KMP_AFFINITY_SUPPORTED
2650 if (master_th->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
2651 __kmp_reset_root_init_mask(gtid);
2656 OMPT_INVOKER(fork_context) |
2657 ((team_microtask == (
void *)__kmp_teams_master) ? ompt_parallel_league
2658 : ompt_parallel_team);
2659 if (ompt_enabled.enabled) {
2660 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,
2666 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2671void __kmp_save_internal_controls(kmp_info_t *thread) {
2673 if (thread->th.th_team != thread->th.th_serial_team) {
2676 if (thread->th.th_team->t.t_serialized > 1) {
2679 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2682 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2683 thread->th.th_team->t.t_serialized) {
2688 kmp_internal_control_t *control =
2689 (kmp_internal_control_t *)__kmp_allocate(
2690 sizeof(kmp_internal_control_t));
2692 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2694 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2696 control->next = thread->th.th_team->t.t_control_stack_top;
2697 thread->th.th_team->t.t_control_stack_top = control;
2703void __kmp_set_num_threads(
int new_nth,
int gtid) {
2707 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2708 KMP_DEBUG_ASSERT(__kmp_init_serial);
2712 else if (new_nth > __kmp_max_nth)
2713 new_nth = __kmp_max_nth;
2716 thread = __kmp_threads[gtid];
2717 if (thread->th.th_current_task->td_icvs.nproc == new_nth)
2720 __kmp_save_internal_controls(thread);
2722 set__nproc(thread, new_nth);
2727 root = thread->th.th_root;
2728 if (__kmp_init_parallel && (!root->r.r_active) &&
2729 (root->r.r_hot_team->t.t_nproc > new_nth)
2730#
if KMP_NESTED_HOT_TEAMS
2731 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2734 kmp_team_t *hot_team = root->r.r_hot_team;
2737 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2739 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2740 __kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);
2743 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2744 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2745 if (__kmp_tasking_mode != tskm_immediate_exec) {
2748 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2750 __kmp_free_thread(hot_team->t.t_threads[f]);
2751 hot_team->t.t_threads[f] = NULL;
2753 hot_team->t.t_nproc = new_nth;
2754#if KMP_NESTED_HOT_TEAMS
2755 if (thread->th.th_hot_teams) {
2756 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2757 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2761 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
2762 hot_team->t.b->update_num_threads(new_nth);
2763 __kmp_add_threads_to_team(hot_team, new_nth);
2766 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2769 for (f = 0; f < new_nth; f++) {
2770 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2771 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2774 hot_team->t.t_size_changed = -1;
2779void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2782 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread "
2784 gtid, max_active_levels));
2785 KMP_DEBUG_ASSERT(__kmp_init_serial);
2788 if (max_active_levels < 0) {
2789 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2794 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new "
2795 "max_active_levels for thread %d = (%d)\n",
2796 gtid, max_active_levels));
2799 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2804 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2805 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2806 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2812 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new "
2813 "max_active_levels for thread %d = (%d)\n",
2814 gtid, max_active_levels));
2816 thread = __kmp_threads[gtid];
2818 __kmp_save_internal_controls(thread);
2820 set__max_active_levels(thread, max_active_levels);
2824int __kmp_get_max_active_levels(
int gtid) {
2827 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2828 KMP_DEBUG_ASSERT(__kmp_init_serial);
2830 thread = __kmp_threads[gtid];
2831 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2832 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, "
2833 "curtask_maxaclevel=%d\n",
2834 gtid, thread->th.th_current_task,
2835 thread->th.th_current_task->td_icvs.max_active_levels));
2836 return thread->th.th_current_task->td_icvs.max_active_levels;
2840void __kmp_set_num_teams(
int num_teams) {
2842 __kmp_nteams = num_teams;
2844int __kmp_get_max_teams(
void) {
return __kmp_nteams; }
2846void __kmp_set_teams_thread_limit(
int limit) {
2848 __kmp_teams_thread_limit = limit;
2850int __kmp_get_teams_thread_limit(
void) {
return __kmp_teams_thread_limit; }
2852KMP_BUILD_ASSERT(
sizeof(kmp_sched_t) ==
sizeof(
int));
2853KMP_BUILD_ASSERT(
sizeof(
enum sched_type) ==
sizeof(
int));
2856void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2858 kmp_sched_t orig_kind;
2861 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2862 gtid, (
int)kind, chunk));
2863 KMP_DEBUG_ASSERT(__kmp_init_serial);
2870 kind = __kmp_sched_without_mods(kind);
2872 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2873 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2875 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2876 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2878 kind = kmp_sched_default;
2882 thread = __kmp_threads[gtid];
2884 __kmp_save_internal_controls(thread);
2886 if (kind < kmp_sched_upper_std) {
2887 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2890 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2892 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2893 __kmp_sch_map[kind - kmp_sched_lower - 1];
2898 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2899 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2900 kmp_sched_lower - 2];
2902 __kmp_sched_apply_mods_intkind(
2903 orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
2904 if (kind == kmp_sched_auto || chunk < 1) {
2906 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2908 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2913void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2917 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2918 KMP_DEBUG_ASSERT(__kmp_init_serial);
2920 thread = __kmp_threads[gtid];
2922 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2923 switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
2925 case kmp_sch_static_greedy:
2926 case kmp_sch_static_balanced:
2927 *kind = kmp_sched_static;
2928 __kmp_sched_apply_mods_stdkind(kind, th_type);
2931 case kmp_sch_static_chunked:
2932 *kind = kmp_sched_static;
2934 case kmp_sch_dynamic_chunked:
2935 *kind = kmp_sched_dynamic;
2938 case kmp_sch_guided_iterative_chunked:
2939 case kmp_sch_guided_analytical_chunked:
2940 *kind = kmp_sched_guided;
2943 *kind = kmp_sched_auto;
2945 case kmp_sch_trapezoidal:
2946 *kind = kmp_sched_trapezoidal;
2948#if KMP_STATIC_STEAL_ENABLED
2949 case kmp_sch_static_steal:
2950 *kind = kmp_sched_static_steal;
2954 KMP_FATAL(UnknownSchedulingType, th_type);
2957 __kmp_sched_apply_mods_stdkind(kind, th_type);
2958 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2961int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2967 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2968 KMP_DEBUG_ASSERT(__kmp_init_serial);
2975 thr = __kmp_threads[gtid];
2976 team = thr->th.th_team;
2977 ii = team->t.t_level;
2981 if (thr->th.th_teams_microtask) {
2983 int tlevel = thr->th.th_teams_level;
2986 KMP_DEBUG_ASSERT(ii >= tlevel);
2998 return __kmp_tid_from_gtid(gtid);
3000 dd = team->t.t_serialized;
3002 while (ii > level) {
3003 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3005 if ((team->t.t_serialized) && (!dd)) {
3006 team = team->t.t_parent;
3010 team = team->t.t_parent;
3011 dd = team->t.t_serialized;
3016 return (dd > 1) ? (0) : (team->t.t_master_tid);
3019int __kmp_get_team_size(
int gtid,
int level) {
3025 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
3026 KMP_DEBUG_ASSERT(__kmp_init_serial);
3033 thr = __kmp_threads[gtid];
3034 team = thr->th.th_team;
3035 ii = team->t.t_level;
3039 if (thr->th.th_teams_microtask) {
3041 int tlevel = thr->th.th_teams_level;
3044 KMP_DEBUG_ASSERT(ii >= tlevel);
3055 while (ii > level) {
3056 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
3058 if (team->t.t_serialized && (!dd)) {
3059 team = team->t.t_parent;
3063 team = team->t.t_parent;
3068 return team->t.t_nproc;
3071kmp_r_sched_t __kmp_get_schedule_global() {
3076 kmp_r_sched_t r_sched;
3082 enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
3083 enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
3086 r_sched.r_sched_type = __kmp_static;
3089 r_sched.r_sched_type = __kmp_guided;
3091 r_sched.r_sched_type = __kmp_sched;
3093 SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
3095 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
3097 r_sched.chunk = KMP_DEFAULT_CHUNK;
3099 r_sched.chunk = __kmp_chunk;
3107static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
3109 KMP_DEBUG_ASSERT(team);
3110 if (!realloc || argc > team->t.t_max_argc) {
3112 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, "
3113 "current entries=%d\n",
3114 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3116 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3117 __kmp_free((
void *)team->t.t_argv);
3119 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3121 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3122 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d "
3124 team->t.t_id, team->t.t_max_argc));
3125 team->t.t_argv = &team->t.t_inline_argv[0];
3126 if (__kmp_storage_map) {
3127 __kmp_print_storage_map_gtid(
3128 -1, &team->t.t_inline_argv[0],
3129 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3130 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3135 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3136 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3138 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d "
3140 team->t.t_id, team->t.t_max_argc));
3142 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3143 if (__kmp_storage_map) {
3144 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3145 &team->t.t_argv[team->t.t_max_argc],
3146 sizeof(
void *) * team->t.t_max_argc,
3147 "team_%d.t_argv", team->t.t_id);
3153static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3155 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3157 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3158 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3159 sizeof(dispatch_shared_info_t) * num_disp_buff);
3160 team->t.t_dispatch =
3161 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3162 team->t.t_implicit_task_taskdata =
3163 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3164 team->t.t_max_nproc = max_nth;
3167 for (i = 0; i < num_disp_buff; ++i) {
3168 team->t.t_disp_buffer[i].buffer_index = i;
3169 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3173static void __kmp_free_team_arrays(kmp_team_t *team) {
3176 for (i = 0; i < team->t.t_max_nproc; ++i) {
3177 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3178 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3179 team->t.t_dispatch[i].th_disp_buffer = NULL;
3182#if KMP_USE_HIER_SCHED
3183 __kmp_dispatch_free_hierarchies(team);
3185 __kmp_free(team->t.t_threads);
3186 __kmp_free(team->t.t_disp_buffer);
3187 __kmp_free(team->t.t_dispatch);
3188 __kmp_free(team->t.t_implicit_task_taskdata);
3189 team->t.t_threads = NULL;
3190 team->t.t_disp_buffer = NULL;
3191 team->t.t_dispatch = NULL;
3192 team->t.t_implicit_task_taskdata = 0;
3195static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3196 kmp_info_t **oldThreads = team->t.t_threads;
3198 __kmp_free(team->t.t_disp_buffer);
3199 __kmp_free(team->t.t_dispatch);
3200 __kmp_free(team->t.t_implicit_task_taskdata);
3201 __kmp_allocate_team_arrays(team, max_nth);
3203 KMP_MEMCPY(team->t.t_threads, oldThreads,
3204 team->t.t_nproc *
sizeof(kmp_info_t *));
3206 __kmp_free(oldThreads);
3209static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3211 kmp_r_sched_t r_sched =
3212 __kmp_get_schedule_global();
3214 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3216 kmp_internal_control_t g_icvs = {
3218 (kmp_int8)__kmp_global.g.g_dynamic,
3220 (kmp_int8)__kmp_env_blocktime,
3222 __kmp_dflt_blocktime,
3227 __kmp_dflt_team_nth,
3231 __kmp_dflt_max_active_levels,
3235 __kmp_nested_proc_bind.bind_types[0],
3236 __kmp_default_device,
3243static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3245 kmp_internal_control_t gx_icvs;
3246 gx_icvs.serial_nesting_level =
3248 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3249 gx_icvs.next = NULL;
3254static void __kmp_initialize_root(kmp_root_t *root) {
3256 kmp_team_t *root_team;
3257 kmp_team_t *hot_team;
3258 int hot_team_max_nth;
3259 kmp_r_sched_t r_sched =
3260 __kmp_get_schedule_global();
3261 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3262 KMP_DEBUG_ASSERT(root);
3263 KMP_ASSERT(!root->r.r_begin);
3266 __kmp_init_lock(&root->r.r_begin_lock);
3267 root->r.r_begin = FALSE;
3268 root->r.r_active = FALSE;
3269 root->r.r_in_parallel = 0;
3270 root->r.r_blocktime = __kmp_dflt_blocktime;
3271#if KMP_AFFINITY_SUPPORTED
3272 root->r.r_affinity_assigned = FALSE;
3277 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3280 __kmp_allocate_team(root,
3286 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3288 USE_NESTED_HOT_ARG(NULL)
3293 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3296 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3298 root->r.r_root_team = root_team;
3299 root_team->t.t_control_stack_top = NULL;
3302 root_team->t.t_threads[0] = NULL;
3303 root_team->t.t_nproc = 1;
3304 root_team->t.t_serialized = 1;
3306 root_team->t.t_sched.sched = r_sched.sched;
3309 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3310 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3314 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3317 __kmp_allocate_team(root,
3319 __kmp_dflt_team_nth_ub * 2,
3323 __kmp_nested_proc_bind.bind_types[0], &r_icvs,
3325 USE_NESTED_HOT_ARG(NULL)
3327 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3329 root->r.r_hot_team = hot_team;
3330 root_team->t.t_control_stack_top = NULL;
3333 hot_team->t.t_parent = root_team;
3336 hot_team_max_nth = hot_team->t.t_max_nproc;
3337 for (f = 0; f < hot_team_max_nth; ++f) {
3338 hot_team->t.t_threads[f] = NULL;
3340 hot_team->t.t_nproc = 1;
3342 hot_team->t.t_sched.sched = r_sched.sched;
3343 hot_team->t.t_size_changed = 0;
3348typedef struct kmp_team_list_item {
3349 kmp_team_p
const *entry;
3350 struct kmp_team_list_item *next;
3351} kmp_team_list_item_t;
3352typedef kmp_team_list_item_t *kmp_team_list_t;
3354static void __kmp_print_structure_team_accum(
3355 kmp_team_list_t list,
3356 kmp_team_p
const *team
3366 KMP_DEBUG_ASSERT(list != NULL);
3371 __kmp_print_structure_team_accum(list, team->t.t_parent);
3372 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3376 while (l->next != NULL && l->entry != team) {
3379 if (l->next != NULL) {
3385 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3391 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3392 sizeof(kmp_team_list_item_t));
3399static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3402 __kmp_printf(
"%s", title);
3404 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3406 __kmp_printf(
" - (nil)\n");
3410static void __kmp_print_structure_thread(
char const *title,
3411 kmp_info_p
const *thread) {
3412 __kmp_printf(
"%s", title);
3413 if (thread != NULL) {
3414 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3416 __kmp_printf(
" - (nil)\n");
3420void __kmp_print_structure(
void) {
3422 kmp_team_list_t list;
3426 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3430 __kmp_printf(
"\n------------------------------\nGlobal Thread "
3431 "Table\n------------------------------\n");
3434 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3435 __kmp_printf(
"%2d", gtid);
3436 if (__kmp_threads != NULL) {
3437 __kmp_printf(
" %p", __kmp_threads[gtid]);
3439 if (__kmp_root != NULL) {
3440 __kmp_printf(
" %p", __kmp_root[gtid]);
3447 __kmp_printf(
"\n------------------------------\nThreads\n--------------------"
3449 if (__kmp_threads != NULL) {
3451 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3452 kmp_info_t
const *thread = __kmp_threads[gtid];
3453 if (thread != NULL) {
3454 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3455 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3456 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3457 __kmp_print_structure_team(
" Serial Team: ",
3458 thread->th.th_serial_team);
3459 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3460 __kmp_print_structure_thread(
" Primary: ",
3461 thread->th.th_team_master);
3462 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3463 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3464 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3465 __kmp_print_structure_thread(
" Next in pool: ",
3466 thread->th.th_next_pool);
3468 __kmp_print_structure_team_accum(list, thread->th.th_team);
3469 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3473 __kmp_printf(
"Threads array is not allocated.\n");
3477 __kmp_printf(
"\n------------------------------\nUbers\n----------------------"
3479 if (__kmp_root != NULL) {
3481 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3482 kmp_root_t
const *root = __kmp_root[gtid];
3484 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3485 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3486 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3487 __kmp_print_structure_thread(
" Uber Thread: ",
3488 root->r.r_uber_thread);
3489 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3490 __kmp_printf(
" In Parallel: %2d\n",
3491 KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));
3493 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3494 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3498 __kmp_printf(
"Ubers array is not allocated.\n");
3501 __kmp_printf(
"\n------------------------------\nTeams\n----------------------"
3503 while (list->next != NULL) {
3504 kmp_team_p
const *team = list->entry;
3506 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3507 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3508 __kmp_printf(
" Primary TID: %2d\n", team->t.t_master_tid);
3509 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3510 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3511 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3512 for (i = 0; i < team->t.t_nproc; ++i) {
3513 __kmp_printf(
" Thread %2d: ", i);
3514 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3516 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3522 __kmp_printf(
"\n------------------------------\nPools\n----------------------"
3524 __kmp_print_structure_thread(
"Thread pool: ",
3525 CCAST(kmp_info_t *, __kmp_thread_pool));
3526 __kmp_print_structure_team(
"Team pool: ",
3527 CCAST(kmp_team_t *, __kmp_team_pool));
3531 while (list != NULL) {
3532 kmp_team_list_item_t *item = list;
3534 KMP_INTERNAL_FREE(item);
3543static const unsigned __kmp_primes[] = {
3544 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3545 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3546 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3547 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3548 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3549 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3550 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3551 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3552 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3553 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3554 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3558unsigned short __kmp_get_random(kmp_info_t *thread) {
3559 unsigned x = thread->th.th_x;
3560 unsigned short r = (
unsigned short)(x >> 16);
3562 thread->th.th_x = x * thread->th.th_a + 1;
3564 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3565 thread->th.th_info.ds.ds_tid, r));
3571void __kmp_init_random(kmp_info_t *thread) {
3572 unsigned seed = thread->th.th_info.ds.ds_tid;
3575 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3576 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3578 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3584static int __kmp_reclaim_dead_roots(
void) {
3587 for (i = 0; i < __kmp_threads_capacity; ++i) {
3588 if (KMP_UBER_GTID(i) &&
3589 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3592 r += __kmp_unregister_root_other_thread(i);
3617static int __kmp_expand_threads(
int nNeed) {
3619 int minimumRequiredCapacity;
3621 kmp_info_t **newThreads;
3622 kmp_root_t **newRoot;
3628#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB
3631 added = __kmp_reclaim_dead_roots();
3660 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3663 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3667 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3669 newCapacity = __kmp_threads_capacity;
3671 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3672 : __kmp_sys_max_nth;
3673 }
while (newCapacity < minimumRequiredCapacity);
3674 newThreads = (kmp_info_t **)__kmp_allocate(
3675 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3677 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3678 KMP_MEMCPY(newThreads, __kmp_threads,
3679 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3680 KMP_MEMCPY(newRoot, __kmp_root,
3681 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3684 kmp_old_threads_list_t *node =
3685 (kmp_old_threads_list_t *)__kmp_allocate(
sizeof(kmp_old_threads_list_t));
3686 node->threads = __kmp_threads;
3687 node->next = __kmp_old_threads_list;
3688 __kmp_old_threads_list = node;
3690 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3691 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3692 added += newCapacity - __kmp_threads_capacity;
3693 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3695 if (newCapacity > __kmp_tp_capacity) {
3696 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3697 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3698 __kmp_threadprivate_resize_cache(newCapacity);
3700 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3702 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3711int __kmp_register_root(
int initial_thread) {
3712 kmp_info_t *root_thread;
3716 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3717 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3734 capacity = __kmp_threads_capacity;
3735 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3742 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
3743 capacity -= __kmp_hidden_helper_threads_num;
3747 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3748 if (__kmp_tp_cached) {
3749 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3750 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3751 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3753 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3763 if (TCR_4(__kmp_init_hidden_helper_threads)) {
3766 for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&
3767 gtid <= __kmp_hidden_helper_threads_num;
3770 KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);
3771 KA_TRACE(1, (
"__kmp_register_root: found slot in threads array for "
3772 "hidden helper thread: T#%d\n",
3778 if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3781 for (gtid = __kmp_hidden_helper_threads_num + 1;
3782 TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)
3786 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3787 KMP_ASSERT(gtid < __kmp_threads_capacity);
3792 TCW_4(__kmp_nth, __kmp_nth + 1);
3796 if (__kmp_adjust_gtid_mode) {
3797 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3798 if (TCR_4(__kmp_gtid_mode) != 2) {
3799 TCW_4(__kmp_gtid_mode, 2);
3802 if (TCR_4(__kmp_gtid_mode) != 1) {
3803 TCW_4(__kmp_gtid_mode, 1);
3808#ifdef KMP_ADJUST_BLOCKTIME
3811 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3812 if (__kmp_nth > __kmp_avail_proc) {
3813 __kmp_zero_bt = TRUE;
3819 if (!(root = __kmp_root[gtid])) {
3820 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3821 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3824#if KMP_STATS_ENABLED
3826 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3827 __kmp_stats_thread_ptr->startLife();
3828 KMP_SET_THREAD_STATE(SERIAL_REGION);
3831 __kmp_initialize_root(root);
3834 if (root->r.r_uber_thread) {
3835 root_thread = root->r.r_uber_thread;
3837 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3838 if (__kmp_storage_map) {
3839 __kmp_print_thread_storage_map(root_thread, gtid);
3841 root_thread->th.th_info.ds.ds_gtid = gtid;
3843 root_thread->th.ompt_thread_info.thread_data = ompt_data_none;
3845 root_thread->th.th_root = root;
3846 if (__kmp_env_consistency_check) {
3847 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3850 __kmp_initialize_fast_memory(root_thread);
3854 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3855 __kmp_initialize_bget(root_thread);
3857 __kmp_init_random(root_thread);
3861 if (!root_thread->th.th_serial_team) {
3862 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3863 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3864 root_thread->th.th_serial_team = __kmp_allocate_team(
3869 proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3871 KMP_ASSERT(root_thread->th.th_serial_team);
3872 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3873 root_thread->th.th_serial_team));
3876 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3878 root->r.r_root_team->t.t_threads[0] = root_thread;
3879 root->r.r_hot_team->t.t_threads[0] = root_thread;
3880 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3882 root_thread->th.th_serial_team->t.t_serialized = 0;
3883 root->r.r_uber_thread = root_thread;
3886 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3887 TCW_4(__kmp_init_gtid, TRUE);
3890 __kmp_gtid_set_specific(gtid);
3893 __kmp_itt_thread_name(gtid);
3896#ifdef KMP_TDATA_GTID
3899 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3900 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3902 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "
3904 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3905 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3906 KMP_INIT_BARRIER_STATE));
3909 for (b = 0; b < bs_last_barrier; ++b) {
3910 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3912 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3916 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3917 KMP_INIT_BARRIER_STATE);
3919#if KMP_AFFINITY_SUPPORTED
3920 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3921 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3922 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3923 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3925 root_thread->th.th_def_allocator = __kmp_def_allocator;
3926 root_thread->th.th_prev_level = 0;
3927 root_thread->th.th_prev_num_threads = 1;
3929 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
3930 tmp->cg_root = root_thread;
3931 tmp->cg_thread_limit = __kmp_cg_max_nth;
3932 tmp->cg_nthreads = 1;
3933 KA_TRACE(100, (
"__kmp_register_root: Thread %p created node %p with"
3934 " cg_nthreads init to 1\n",
3937 root_thread->th.th_cg_roots = tmp;
3939 __kmp_root_counter++;
3942 if (!initial_thread && ompt_enabled.enabled) {
3944 kmp_info_t *root_thread = ompt_get_thread();
3946 ompt_set_thread_state(root_thread, ompt_state_overhead);
3948 if (ompt_enabled.ompt_callback_thread_begin) {
3949 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3950 ompt_thread_initial, __ompt_get_thread_data_internal());
3952 ompt_data_t *task_data;
3953 ompt_data_t *parallel_data;
3954 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
3956 if (ompt_enabled.ompt_callback_implicit_task) {
3957 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
3958 ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);
3961 ompt_set_thread_state(root_thread, ompt_state_work_serial);
3965 if (ompd_state & OMPD_ENABLE_BP)
3966 ompd_bp_thread_begin();
3970 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3975#if KMP_NESTED_HOT_TEAMS
3976static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3977 const int max_level) {
3979 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3980 if (!hot_teams || !hot_teams[level].hot_team) {
3983 KMP_DEBUG_ASSERT(level < max_level);
3984 kmp_team_t *team = hot_teams[level].hot_team;
3985 nth = hot_teams[level].hot_team_nth;
3987 if (level < max_level - 1) {
3988 for (i = 0; i < nth; ++i) {
3989 kmp_info_t *th = team->t.t_threads[i];
3990 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3991 if (i > 0 && th->th.th_hot_teams) {
3992 __kmp_free(th->th.th_hot_teams);
3993 th->th.th_hot_teams = NULL;
3997 __kmp_free_team(root, team, NULL);
4004static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
4005 kmp_team_t *root_team = root->r.r_root_team;
4006 kmp_team_t *hot_team = root->r.r_hot_team;
4007 int n = hot_team->t.t_nproc;
4010 KMP_DEBUG_ASSERT(!root->r.r_active);
4012 root->r.r_root_team = NULL;
4013 root->r.r_hot_team = NULL;
4016 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
4017#if KMP_NESTED_HOT_TEAMS
4018 if (__kmp_hot_teams_max_level >
4020 for (i = 0; i < hot_team->t.t_nproc; ++i) {
4021 kmp_info_t *th = hot_team->t.t_threads[i];
4022 if (__kmp_hot_teams_max_level > 1) {
4023 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
4025 if (th->th.th_hot_teams) {
4026 __kmp_free(th->th.th_hot_teams);
4027 th->th.th_hot_teams = NULL;
4032 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
4037 if (__kmp_tasking_mode != tskm_immediate_exec) {
4038 __kmp_wait_to_unref_task_teams();
4044 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
4046 (LPVOID) & (root->r.r_uber_thread->th),
4047 root->r.r_uber_thread->th.th_info.ds.ds_thread));
4048 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
4052 if (ompd_state & OMPD_ENABLE_BP)
4053 ompd_bp_thread_end();
4057 ompt_data_t *task_data;
4058 ompt_data_t *parallel_data;
4059 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,
4061 if (ompt_enabled.ompt_callback_implicit_task) {
4062 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
4063 ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);
4065 if (ompt_enabled.ompt_callback_thread_end) {
4066 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
4067 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
4073 i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;
4074 KA_TRACE(100, (
"__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"
4076 root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,
4077 root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));
4080 KMP_DEBUG_ASSERT(root->r.r_uber_thread ==
4081 root->r.r_uber_thread->th.th_cg_roots->cg_root);
4082 KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);
4083 __kmp_free(root->r.r_uber_thread->th.th_cg_roots);
4084 root->r.r_uber_thread->th.th_cg_roots = NULL;
4086 __kmp_reap_thread(root->r.r_uber_thread, 1);
4090 root->r.r_uber_thread = NULL;
4092 root->r.r_begin = FALSE;
4097void __kmp_unregister_root_current_thread(
int gtid) {
4098 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
4102 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
4103 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
4104 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, "
4107 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4110 kmp_root_t *root = __kmp_root[gtid];
4112 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4113 KMP_ASSERT(KMP_UBER_GTID(gtid));
4114 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4115 KMP_ASSERT(root->r.r_active == FALSE);
4119 kmp_info_t *thread = __kmp_threads[gtid];
4120 kmp_team_t *team = thread->th.th_team;
4121 kmp_task_team_t *task_team = thread->th.th_task_team;
4124 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
4125 task_team->tt.tt_hidden_helper_task_encountered)) {
4128 thread->th.ompt_thread_info.state = ompt_state_undefined;
4130 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
4133 __kmp_reset_root(gtid, root);
4137 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
4139 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
4146static int __kmp_unregister_root_other_thread(
int gtid) {
4147 kmp_root_t *root = __kmp_root[gtid];
4150 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
4151 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
4152 KMP_ASSERT(KMP_UBER_GTID(gtid));
4153 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
4154 KMP_ASSERT(root->r.r_active == FALSE);
4156 r = __kmp_reset_root(gtid, root);
4158 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4164void __kmp_task_info() {
4166 kmp_int32 gtid = __kmp_entry_gtid();
4167 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4168 kmp_info_t *this_thr = __kmp_threads[gtid];
4169 kmp_team_t *steam = this_thr->th.th_serial_team;
4170 kmp_team_t *team = this_thr->th.th_team;
4173 "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "
4175 gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,
4176 team->t.t_implicit_task_taskdata[tid].td_parent);
4183static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4184 int tid,
int gtid) {
4188 KMP_DEBUG_ASSERT(this_thr != NULL);
4189 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4190 KMP_DEBUG_ASSERT(team);
4191 KMP_DEBUG_ASSERT(team->t.t_threads);
4192 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4193 kmp_info_t *master = team->t.t_threads[0];
4194 KMP_DEBUG_ASSERT(master);
4195 KMP_DEBUG_ASSERT(master->th.th_root);
4199 TCW_SYNC_PTR(this_thr->th.th_team, team);
4201 this_thr->th.th_info.ds.ds_tid = tid;
4202 this_thr->th.th_set_nproc = 0;
4203 if (__kmp_tasking_mode != tskm_immediate_exec)
4206 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4208 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4209 this_thr->th.th_set_proc_bind = proc_bind_default;
4210#if KMP_AFFINITY_SUPPORTED
4211 this_thr->th.th_new_place = this_thr->th.th_current_place;
4213 this_thr->th.th_root = master->th.th_root;
4216 this_thr->th.th_team_nproc = team->t.t_nproc;
4217 this_thr->th.th_team_master = master;
4218 this_thr->th.th_team_serialized = team->t.t_serialized;
4220 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4222 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4223 tid, gtid, this_thr, this_thr->th.th_current_task));
4225 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4228 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4229 tid, gtid, this_thr, this_thr->th.th_current_task));
4234 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4236 this_thr->th.th_local.this_construct = 0;
4238 if (!this_thr->th.th_pri_common) {
4239 this_thr->th.th_pri_common =
4240 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4241 if (__kmp_storage_map) {
4242 __kmp_print_storage_map_gtid(
4243 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4244 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4246 this_thr->th.th_pri_head = NULL;
4249 if (this_thr != master &&
4250 this_thr->th.th_cg_roots != master->th.th_cg_roots) {
4252 KMP_DEBUG_ASSERT(master->th.th_cg_roots);
4253 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
4256 int i = tmp->cg_nthreads--;
4257 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p decrement cg_nthreads"
4258 " on node %p of thread %p to %d\n",
4259 this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));
4264 this_thr->th.th_cg_roots = master->th.th_cg_roots;
4266 this_thr->th.th_cg_roots->cg_nthreads++;
4267 KA_TRACE(100, (
"__kmp_initialize_info: Thread %p increment cg_nthreads on"
4268 " node %p of thread %p to %d\n",
4269 this_thr, this_thr->th.th_cg_roots,
4270 this_thr->th.th_cg_roots->cg_root,
4271 this_thr->th.th_cg_roots->cg_nthreads));
4272 this_thr->th.th_current_task->td_icvs.thread_limit =
4273 this_thr->th.th_cg_roots->cg_thread_limit;
4278 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4281 sizeof(dispatch_private_info_t) *
4282 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4283 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4284 team->t.t_max_nproc));
4285 KMP_ASSERT(dispatch);
4286 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4287 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4289 dispatch->th_disp_index = 0;
4290 dispatch->th_doacross_buf_idx = 0;
4291 if (!dispatch->th_disp_buffer) {
4292 dispatch->th_disp_buffer =
4293 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4295 if (__kmp_storage_map) {
4296 __kmp_print_storage_map_gtid(
4297 gtid, &dispatch->th_disp_buffer[0],
4298 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4300 : __kmp_dispatch_num_buffers],
4302 "th_%d.th_dispatch.th_disp_buffer "
4303 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4304 gtid, team->t.t_id, gtid);
4307 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4310 dispatch->th_dispatch_pr_current = 0;
4311 dispatch->th_dispatch_sh_current = 0;
4313 dispatch->th_deo_fcn = 0;
4314 dispatch->th_dxo_fcn = 0;
4317 this_thr->th.th_next_pool = NULL;
4319 if (!this_thr->th.th_task_state_memo_stack) {
4321 this_thr->th.th_task_state_memo_stack =
4322 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4323 this_thr->th.th_task_state_top = 0;
4324 this_thr->th.th_task_state_stack_sz = 4;
4325 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4327 this_thr->th.th_task_state_memo_stack[i] = 0;
4330 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4331 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4341kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4343 kmp_team_t *serial_team;
4344 kmp_info_t *new_thr;
4347 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4348 KMP_DEBUG_ASSERT(root && team);
4349#if !KMP_NESTED_HOT_TEAMS
4350 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4355 if (__kmp_thread_pool) {
4356 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4357 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4358 if (new_thr == __kmp_thread_pool_insert_pt) {
4359 __kmp_thread_pool_insert_pt = NULL;
4361 TCW_4(new_thr->th.th_in_pool, FALSE);
4362 __kmp_suspend_initialize_thread(new_thr);
4363 __kmp_lock_suspend_mx(new_thr);
4364 if (new_thr->th.th_active_in_pool == TRUE) {
4365 KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);
4366 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
4367 new_thr->th.th_active_in_pool = FALSE;
4369 __kmp_unlock_suspend_mx(new_thr);
4371 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4372 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4373 KMP_ASSERT(!new_thr->th.th_team);
4374 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4377 __kmp_initialize_info(new_thr, team, new_tid,
4378 new_thr->th.th_info.ds.ds_gtid);
4379 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4381 TCW_4(__kmp_nth, __kmp_nth + 1);
4383 new_thr->th.th_task_state = 0;
4384 new_thr->th.th_task_state_top = 0;
4385 new_thr->th.th_task_state_stack_sz = 4;
4387 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
4389 KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);
4393#ifdef KMP_ADJUST_BLOCKTIME
4396 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4397 if (__kmp_nth > __kmp_avail_proc) {
4398 __kmp_zero_bt = TRUE;
4407 kmp_balign_t *balign = new_thr->th.th_bar;
4408 for (b = 0; b < bs_last_barrier; ++b)
4409 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4412 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4413 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4420 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4421 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4426 if (!TCR_4(__kmp_init_monitor)) {
4427 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4428 if (!TCR_4(__kmp_init_monitor)) {
4429 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4430 TCW_4(__kmp_init_monitor, 1);
4431 __kmp_create_monitor(&__kmp_monitor);
4432 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4443 while (TCR_4(__kmp_init_monitor) < 2) {
4446 KF_TRACE(10, (
"after monitor thread has started\n"));
4449 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4456 int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)
4458 : __kmp_hidden_helper_threads_num + 1;
4460 for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;
4462 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4465 if (TCR_4(__kmp_init_hidden_helper_threads)) {
4466 KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);
4471 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4473 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4475#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
4478 __itt_suppress_mark_range(
4479 __itt_suppress_range, __itt_suppress_threading_errors,
4480 &new_thr->th.th_sleep_loc,
sizeof(new_thr->th.th_sleep_loc));
4481 __itt_suppress_mark_range(
4482 __itt_suppress_range, __itt_suppress_threading_errors,
4483 &new_thr->th.th_reap_state,
sizeof(new_thr->th.th_reap_state));
4485 __itt_suppress_mark_range(
4486 __itt_suppress_range, __itt_suppress_threading_errors,
4487 &new_thr->th.th_suspend_init,
sizeof(new_thr->th.th_suspend_init));
4489 __itt_suppress_mark_range(__itt_suppress_range,
4490 __itt_suppress_threading_errors,
4491 &new_thr->th.th_suspend_init_count,
4492 sizeof(new_thr->th.th_suspend_init_count));
4495 __itt_suppress_mark_range(__itt_suppress_range,
4496 __itt_suppress_threading_errors,
4497 CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
4498 sizeof(new_thr->th.th_bar[0].bb.b_go));
4499 __itt_suppress_mark_range(__itt_suppress_range,
4500 __itt_suppress_threading_errors,
4501 CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
4502 sizeof(new_thr->th.th_bar[1].bb.b_go));
4503 __itt_suppress_mark_range(__itt_suppress_range,
4504 __itt_suppress_threading_errors,
4505 CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
4506 sizeof(new_thr->th.th_bar[2].bb.b_go));
4508 if (__kmp_storage_map) {
4509 __kmp_print_thread_storage_map(new_thr, new_gtid);
4514 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4515 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4516 new_thr->th.th_serial_team = serial_team =
4517 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4521 proc_bind_default, &r_icvs,
4522 0 USE_NESTED_HOT_ARG(NULL));
4524 KMP_ASSERT(serial_team);
4525 serial_team->t.t_serialized = 0;
4527 serial_team->t.t_threads[0] = new_thr;
4529 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4533 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4536 __kmp_initialize_fast_memory(new_thr);
4540 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4541 __kmp_initialize_bget(new_thr);
4544 __kmp_init_random(new_thr);
4548 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4549 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4552 kmp_balign_t *balign = new_thr->th.th_bar;
4553 for (b = 0; b < bs_last_barrier; ++b) {
4554 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4555 balign[b].bb.team = NULL;
4556 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4557 balign[b].bb.use_oncore_barrier = 0;
4560 TCW_PTR(new_thr->th.th_sleep_loc, NULL);
4561 new_thr->th.th_sleep_loc_type = flag_unset;
4563 new_thr->th.th_spin_here = FALSE;
4564 new_thr->th.th_next_waiting = 0;
4566 new_thr->th.th_blocking =
false;
4569#if KMP_AFFINITY_SUPPORTED
4570 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4571 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4572 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4573 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4575 new_thr->th.th_def_allocator = __kmp_def_allocator;
4576 new_thr->th.th_prev_level = 0;
4577 new_thr->th.th_prev_num_threads = 1;
4579 TCW_4(new_thr->th.th_in_pool, FALSE);
4580 new_thr->th.th_active_in_pool = FALSE;
4581 TCW_4(new_thr->th.th_active, TRUE);
4589 if (__kmp_adjust_gtid_mode) {
4590 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4591 if (TCR_4(__kmp_gtid_mode) != 2) {
4592 TCW_4(__kmp_gtid_mode, 2);
4595 if (TCR_4(__kmp_gtid_mode) != 1) {
4596 TCW_4(__kmp_gtid_mode, 1);
4601#ifdef KMP_ADJUST_BLOCKTIME
4604 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4605 if (__kmp_nth > __kmp_avail_proc) {
4606 __kmp_zero_bt = TRUE;
4613 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4614 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4616 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4618 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4629static void __kmp_reinitialize_team(kmp_team_t *team,
4630 kmp_internal_control_t *new_icvs,
4632 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4633 team->t.t_threads[0], team));
4634 KMP_DEBUG_ASSERT(team && new_icvs);
4635 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4636 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4638 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4640 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4641 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4643 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4644 team->t.t_threads[0], team));
4650static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4651 kmp_internal_control_t *new_icvs,
4653 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4656 KMP_DEBUG_ASSERT(team);
4657 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4658 KMP_DEBUG_ASSERT(team->t.t_threads);
4661 team->t.t_master_tid = 0;
4663 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4664 team->t.t_nproc = new_nproc;
4667 team->t.t_next_pool = NULL;
4671 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4672 team->t.t_invoke = NULL;
4675 team->t.t_sched.sched = new_icvs->sched.sched;
4677#if KMP_ARCH_X86 || KMP_ARCH_X86_64
4678 team->t.t_fp_control_saved = FALSE;
4679 team->t.t_x87_fpu_control_word = 0;
4680 team->t.t_mxcsr = 0;
4683 team->t.t_construct = 0;
4685 team->t.t_ordered.dt.t_value = 0;
4686 team->t.t_master_active = FALSE;
4689 team->t.t_copypriv_data = NULL;
4692 team->t.t_copyin_counter = 0;
4695 team->t.t_control_stack_top = NULL;
4697 __kmp_reinitialize_team(team, new_icvs, loc);
4700 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4703#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
4706__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4707 if (KMP_AFFINITY_CAPABLE()) {
4709 if (old_mask != NULL) {
4710 status = __kmp_get_system_affinity(old_mask, TRUE);
4713 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4717 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4722#if KMP_AFFINITY_SUPPORTED
4728static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4730 if (KMP_HIDDEN_HELPER_TEAM(team))
4733 kmp_info_t *master_th = team->t.t_threads[0];
4734 KMP_DEBUG_ASSERT(master_th != NULL);
4735 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4736 int first_place = master_th->th.th_first_place;
4737 int last_place = master_th->th.th_last_place;
4738 int masters_place = master_th->th.th_current_place;
4739 team->t.t_first_place = first_place;
4740 team->t.t_last_place = last_place;
4742 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "
4743 "bound to place %d partition = [%d,%d]\n",
4744 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4745 team->t.t_id, masters_place, first_place, last_place));
4747 switch (proc_bind) {
4749 case proc_bind_default:
4752 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4755 case proc_bind_primary: {
4757 int n_th = team->t.t_nproc;
4758 for (f = 1; f < n_th; f++) {
4759 kmp_info_t *th = team->t.t_threads[f];
4760 KMP_DEBUG_ASSERT(th != NULL);
4761 th->th.th_first_place = first_place;
4762 th->th.th_last_place = last_place;
4763 th->th.th_new_place = masters_place;
4764 if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
4765 team->t.t_display_affinity != 1) {
4766 team->t.t_display_affinity = 1;
4769 KA_TRACE(100, (
"__kmp_partition_places: primary: T#%d(%d:%d) place %d "
4770 "partition = [%d,%d]\n",
4771 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4772 f, masters_place, first_place, last_place));
4776 case proc_bind_close: {
4778 int n_th = team->t.t_nproc;
4780 if (first_place <= last_place) {
4781 n_places = last_place - first_place + 1;
4783 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4785 if (n_th <= n_places) {
4786 int place = masters_place;
4787 for (f = 1; f < n_th; f++) {
4788 kmp_info_t *th = team->t.t_threads[f];
4789 KMP_DEBUG_ASSERT(th != NULL);
4791 if (place == last_place) {
4792 place = first_place;
4793 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4798 th->th.th_first_place = first_place;
4799 th->th.th_last_place = last_place;
4800 th->th.th_new_place = place;
4801 if (__kmp_display_affinity && place != th->th.th_current_place &&
4802 team->t.t_display_affinity != 1) {
4803 team->t.t_display_affinity = 1;
4806 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4807 "partition = [%d,%d]\n",
4808 __kmp_gtid_from_thread(team->t.t_threads[f]),
4809 team->t.t_id, f, place, first_place, last_place));
4812 int S, rem, gap, s_count;
4813 S = n_th / n_places;
4815 rem = n_th - (S * n_places);
4816 gap = rem > 0 ? n_places / rem : n_places;
4817 int place = masters_place;
4819 for (f = 0; f < n_th; f++) {
4820 kmp_info_t *th = team->t.t_threads[f];
4821 KMP_DEBUG_ASSERT(th != NULL);
4823 th->th.th_first_place = first_place;
4824 th->th.th_last_place = last_place;
4825 th->th.th_new_place = place;
4826 if (__kmp_display_affinity && place != th->th.th_current_place &&
4827 team->t.t_display_affinity != 1) {
4828 team->t.t_display_affinity = 1;
4832 if ((s_count == S) && rem && (gap_ct == gap)) {
4834 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4836 if (place == last_place) {
4837 place = first_place;
4838 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4846 }
else if (s_count == S) {
4847 if (place == last_place) {
4848 place = first_place;
4849 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4859 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d "
4860 "partition = [%d,%d]\n",
4861 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4862 th->th.th_new_place, first_place, last_place));
4864 KMP_DEBUG_ASSERT(place == masters_place);
4868 case proc_bind_spread: {
4870 int n_th = team->t.t_nproc;
4873 if (first_place <= last_place) {
4874 n_places = last_place - first_place + 1;
4876 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4878 if (n_th <= n_places) {
4881 if (n_places !=
static_cast<int>(__kmp_affinity_num_masks)) {
4882 int S = n_places / n_th;
4883 int s_count, rem, gap, gap_ct;
4885 place = masters_place;
4886 rem = n_places - n_th * S;
4887 gap = rem ? n_th / rem : 1;
4890 if (update_master_only == 1)
4892 for (f = 0; f < thidx; f++) {
4893 kmp_info_t *th = team->t.t_threads[f];
4894 KMP_DEBUG_ASSERT(th != NULL);
4896 th->th.th_first_place = place;
4897 th->th.th_new_place = place;
4898 if (__kmp_display_affinity && place != th->th.th_current_place &&
4899 team->t.t_display_affinity != 1) {
4900 team->t.t_display_affinity = 1;
4903 while (s_count < S) {
4904 if (place == last_place) {
4905 place = first_place;
4906 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4913 if (rem && (gap_ct == gap)) {
4914 if (place == last_place) {
4915 place = first_place;
4916 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4924 th->th.th_last_place = place;
4927 if (place == last_place) {
4928 place = first_place;
4929 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4936 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4937 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4938 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4939 f, th->th.th_new_place, th->th.th_first_place,
4940 th->th.th_last_place, __kmp_affinity_num_masks));
4946 double current =
static_cast<double>(masters_place);
4948 (
static_cast<double>(n_places + 1) /
static_cast<double>(n_th));
4953 if (update_master_only == 1)
4955 for (f = 0; f < thidx; f++) {
4956 first =
static_cast<int>(current);
4957 last =
static_cast<int>(current + spacing) - 1;
4958 KMP_DEBUG_ASSERT(last >= first);
4959 if (first >= n_places) {
4960 if (masters_place) {
4963 if (first == (masters_place + 1)) {
4964 KMP_DEBUG_ASSERT(f == n_th);
4967 if (last == masters_place) {
4968 KMP_DEBUG_ASSERT(f == (n_th - 1));
4972 KMP_DEBUG_ASSERT(f == n_th);
4977 if (last >= n_places) {
4978 last = (n_places - 1);
4983 KMP_DEBUG_ASSERT(0 <= first);
4984 KMP_DEBUG_ASSERT(n_places > first);
4985 KMP_DEBUG_ASSERT(0 <= last);
4986 KMP_DEBUG_ASSERT(n_places > last);
4987 KMP_DEBUG_ASSERT(last_place >= first_place);
4988 th = team->t.t_threads[f];
4989 KMP_DEBUG_ASSERT(th);
4990 th->th.th_first_place = first;
4991 th->th.th_new_place = place;
4992 th->th.th_last_place = last;
4993 if (__kmp_display_affinity && place != th->th.th_current_place &&
4994 team->t.t_display_affinity != 1) {
4995 team->t.t_display_affinity = 1;
4998 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
4999 "partition = [%d,%d], spacing = %.4f\n",
5000 __kmp_gtid_from_thread(team->t.t_threads[f]),
5001 team->t.t_id, f, th->th.th_new_place,
5002 th->th.th_first_place, th->th.th_last_place, spacing));
5006 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5008 int S, rem, gap, s_count;
5009 S = n_th / n_places;
5011 rem = n_th - (S * n_places);
5012 gap = rem > 0 ? n_places / rem : n_places;
5013 int place = masters_place;
5016 if (update_master_only == 1)
5018 for (f = 0; f < thidx; f++) {
5019 kmp_info_t *th = team->t.t_threads[f];
5020 KMP_DEBUG_ASSERT(th != NULL);
5022 th->th.th_first_place = place;
5023 th->th.th_last_place = place;
5024 th->th.th_new_place = place;
5025 if (__kmp_display_affinity && place != th->th.th_current_place &&
5026 team->t.t_display_affinity != 1) {
5027 team->t.t_display_affinity = 1;
5031 if ((s_count == S) && rem && (gap_ct == gap)) {
5033 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
5035 if (place == last_place) {
5036 place = first_place;
5037 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
5045 }
else if (s_count == S) {
5046 if (place == last_place) {
5047 place = first_place;
5048 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
5057 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d "
5058 "partition = [%d,%d]\n",
5059 __kmp_gtid_from_thread(team->t.t_threads[f]),
5060 team->t.t_id, f, th->th.th_new_place,
5061 th->th.th_first_place, th->th.th_last_place));
5063 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
5071 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
5079__kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
5081 ompt_data_t ompt_parallel_data,
5083 kmp_proc_bind_t new_proc_bind,
5084 kmp_internal_control_t *new_icvs,
5085 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5086 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
5089 int use_hot_team = !root->r.r_active;
5091 int do_place_partition = 1;
5093 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
5094 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
5095 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
5098#if KMP_NESTED_HOT_TEAMS
5099 kmp_hot_team_ptr_t *hot_teams;
5101 team = master->th.th_team;
5102 level = team->t.t_active_level;
5103 if (master->th.th_teams_microtask) {
5104 if (master->th.th_teams_size.nteams > 1 &&
5107 (microtask_t)__kmp_teams_master ||
5108 master->th.th_teams_level <
5115 if ((master->th.th_teams_size.nteams == 1 &&
5116 master->th.th_teams_level >= team->t.t_level) ||
5117 (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
5118 do_place_partition = 0;
5120 hot_teams = master->th.th_hot_teams;
5121 if (level < __kmp_hot_teams_max_level && hot_teams &&
5122 hot_teams[level].hot_team) {
5130 KMP_DEBUG_ASSERT(new_nproc == 1);
5134 if (use_hot_team && new_nproc > 1) {
5135 KMP_DEBUG_ASSERT(new_nproc <= max_nproc);
5136#if KMP_NESTED_HOT_TEAMS
5137 team = hot_teams[level].hot_team;
5139 team = root->r.r_hot_team;
5142 if (__kmp_tasking_mode != tskm_immediate_exec) {
5143 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5144 "task_team[1] = %p before reinit\n",
5145 team->t.t_task_team[0], team->t.t_task_team[1]));
5149 if (team->t.t_nproc != new_nproc &&
5150 __kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5152 int old_nthr = team->t.t_nproc;
5153 __kmp_resize_dist_barrier(team, old_nthr, new_nproc);
5158 if (do_place_partition == 0)
5159 team->t.t_proc_bind = proc_bind_default;
5163 if (team->t.t_nproc == new_nproc) {
5164 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
5167 if (team->t.t_size_changed == -1) {
5168 team->t.t_size_changed = 1;
5170 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
5174 kmp_r_sched_t new_sched = new_icvs->sched;
5176 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
5178 __kmp_reinitialize_team(team, new_icvs,
5179 root->r.r_uber_thread->th.th_ident);
5181 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
5182 team->t.t_threads[0], team));
5183 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5185#if KMP_AFFINITY_SUPPORTED
5186 if ((team->t.t_size_changed == 0) &&
5187 (team->t.t_proc_bind == new_proc_bind)) {
5188 if (new_proc_bind == proc_bind_spread) {
5189 if (do_place_partition) {
5191 __kmp_partition_places(team, 1);
5194 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: "
5195 "proc_bind = %d, partition = [%d,%d]\n",
5196 team->t.t_id, new_proc_bind, team->t.t_first_place,
5197 team->t.t_last_place));
5199 if (do_place_partition) {
5200 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5201 __kmp_partition_places(team);
5205 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5207 }
else if (team->t.t_nproc > new_nproc) {
5209 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
5212 team->t.t_size_changed = 1;
5213 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5216 __kmp_add_threads_to_team(team, new_nproc);
5218#if KMP_NESTED_HOT_TEAMS
5219 if (__kmp_hot_teams_mode == 0) {
5222 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
5223 hot_teams[level].hot_team_nth = new_nproc;
5226 for (f = new_nproc; f < team->t.t_nproc; f++) {
5227 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5228 if (__kmp_tasking_mode != tskm_immediate_exec) {
5231 team->t.t_threads[f]->th.th_task_team = NULL;
5233 __kmp_free_thread(team->t.t_threads[f]);
5234 team->t.t_threads[f] = NULL;
5236#if KMP_NESTED_HOT_TEAMS
5241 for (f = new_nproc; f < team->t.t_nproc; ++f) {
5242 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5243 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
5244 for (
int b = 0; b < bs_last_barrier; ++b) {
5245 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
5246 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5248 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
5253 team->t.t_nproc = new_nproc;
5255 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
5256 __kmp_reinitialize_team(team, new_icvs,
5257 root->r.r_uber_thread->th.th_ident);
5260 for (f = 0; f < new_nproc; ++f) {
5261 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
5266 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
5267 team->t.t_threads[0], team));
5269 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
5272 for (f = 0; f < team->t.t_nproc; f++) {
5273 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5274 team->t.t_threads[f]->th.th_team_nproc ==
5279 if (do_place_partition) {
5280 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5281#if KMP_AFFINITY_SUPPORTED
5282 __kmp_partition_places(team);
5286#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5287 kmp_affin_mask_t *old_mask;
5288 if (KMP_AFFINITY_CAPABLE()) {
5289 KMP_CPU_ALLOC(old_mask);
5294 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5296 int old_nproc = team->t.t_nproc;
5297 team->t.t_size_changed = 1;
5299#if KMP_NESTED_HOT_TEAMS
5300 int avail_threads = hot_teams[level].hot_team_nth;
5301 if (new_nproc < avail_threads)
5302 avail_threads = new_nproc;
5303 kmp_info_t **other_threads = team->t.t_threads;
5304 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5308 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5309 for (b = 0; b < bs_last_barrier; ++b) {
5310 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5311 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5313 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5317 if (hot_teams[level].hot_team_nth >= new_nproc) {
5320 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5321 team->t.t_nproc = new_nproc;
5325 team->t.t_nproc = hot_teams[level].hot_team_nth;
5326 hot_teams[level].hot_team_nth = new_nproc;
5328 if (team->t.t_max_nproc < new_nproc) {
5330 __kmp_reallocate_team_arrays(team, new_nproc);
5331 __kmp_reinitialize_team(team, new_icvs, NULL);
5334#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5340 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5344 for (f = team->t.t_nproc; f < new_nproc; f++) {
5345 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5346 KMP_DEBUG_ASSERT(new_worker);
5347 team->t.t_threads[f] = new_worker;
5350 (
"__kmp_allocate_team: team %d init T#%d arrived: "
5351 "join=%llu, plain=%llu\n",
5352 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5353 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5354 team->t.t_bar[bs_plain_barrier].b_arrived));
5358 kmp_balign_t *balign = new_worker->th.th_bar;
5359 for (b = 0; b < bs_last_barrier; ++b) {
5360 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5361 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5362 KMP_BARRIER_PARENT_FLAG);
5364 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5370#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
5371 if (KMP_AFFINITY_CAPABLE()) {
5373 __kmp_set_system_affinity(old_mask, TRUE);
5374 KMP_CPU_FREE(old_mask);
5377#if KMP_NESTED_HOT_TEAMS
5380 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5383 __kmp_add_threads_to_team(team, new_nproc);
5387 __kmp_initialize_team(team, new_nproc, new_icvs,
5388 root->r.r_uber_thread->th.th_ident);
5391 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5392 for (f = 0; f < team->t.t_nproc; ++f)
5393 __kmp_initialize_info(team->t.t_threads[f], team, f,
5394 __kmp_gtid_from_tid(f, team));
5402 for (f = old_nproc; f < team->t.t_nproc; ++f)
5403 team->t.t_threads[f]->th.th_task_state =
5404 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5407 kmp_uint8 old_state = team->t.t_threads[0]->th.th_task_state;
5408 for (f = old_nproc; f < team->t.t_nproc; ++f)
5409 team->t.t_threads[f]->th.th_task_state = old_state;
5413 for (f = 0; f < team->t.t_nproc; ++f) {
5414 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5415 team->t.t_threads[f]->th.th_team_nproc ==
5420 if (do_place_partition) {
5421 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5422#if KMP_AFFINITY_SUPPORTED
5423 __kmp_partition_places(team);
5428 kmp_info_t *master = team->t.t_threads[0];
5429 if (master->th.th_teams_microtask) {
5430 for (f = 1; f < new_nproc; ++f) {
5432 kmp_info_t *thr = team->t.t_threads[f];
5433 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5434 thr->th.th_teams_level = master->th.th_teams_level;
5435 thr->th.th_teams_size = master->th.th_teams_size;
5438#if KMP_NESTED_HOT_TEAMS
5442 for (f = 1; f < new_nproc; ++f) {
5443 kmp_info_t *thr = team->t.t_threads[f];
5445 kmp_balign_t *balign = thr->th.th_bar;
5446 for (b = 0; b < bs_last_barrier; ++b) {
5447 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5448 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5450 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5458 __kmp_alloc_argv_entries(argc, team, TRUE);
5459 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5463 KF_TRACE(10, (
" hot_team = %p\n", team));
5466 if (__kmp_tasking_mode != tskm_immediate_exec) {
5467 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p "
5468 "task_team[1] = %p after reinit\n",
5469 team->t.t_task_team[0], team->t.t_task_team[1]));
5474 __ompt_team_assign_id(team, ompt_parallel_data);
5484 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5487 if (team->t.t_max_nproc >= max_nproc) {
5489 __kmp_team_pool = team->t.t_next_pool;
5491 if (max_nproc > 1 &&
5492 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5494 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5499 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5501 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and "
5502 "task_team[1] %p to NULL\n",
5503 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5504 team->t.t_task_team[0] = NULL;
5505 team->t.t_task_team[1] = NULL;
5508 __kmp_alloc_argv_entries(argc, team, TRUE);
5509 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5512 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5513 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5516 for (b = 0; b < bs_last_barrier; ++b) {
5517 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5519 team->t.t_bar[b].b_master_arrived = 0;
5520 team->t.t_bar[b].b_team_arrived = 0;
5525 team->t.t_proc_bind = new_proc_bind;
5527 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5531 __ompt_team_assign_id(team, ompt_parallel_data);
5543 team = __kmp_reap_team(team);
5544 __kmp_team_pool = team;
5549 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5552 team->t.t_max_nproc = max_nproc;
5553 if (max_nproc > 1 &&
5554 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5556 team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);
5561 __kmp_allocate_team_arrays(team, max_nproc);
5563 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5564 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5566 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] "
5568 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5569 team->t.t_task_team[0] = NULL;
5571 team->t.t_task_team[1] = NULL;
5574 if (__kmp_storage_map) {
5575 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5579 __kmp_alloc_argv_entries(argc, team, FALSE);
5580 team->t.t_argc = argc;
5583 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5584 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5587 for (b = 0; b < bs_last_barrier; ++b) {
5588 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5590 team->t.t_bar[b].b_master_arrived = 0;
5591 team->t.t_bar[b].b_team_arrived = 0;
5596 team->t.t_proc_bind = new_proc_bind;
5599 __ompt_team_assign_id(team, ompt_parallel_data);
5600 team->t.ompt_serialized_team_info = NULL;
5605 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5616void __kmp_free_team(kmp_root_t *root,
5617 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5619 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5623 KMP_DEBUG_ASSERT(root);
5624 KMP_DEBUG_ASSERT(team);
5625 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5626 KMP_DEBUG_ASSERT(team->t.t_threads);
5628 int use_hot_team = team == root->r.r_hot_team;
5629#if KMP_NESTED_HOT_TEAMS
5632 level = team->t.t_active_level - 1;
5633 if (master->th.th_teams_microtask) {
5634 if (master->th.th_teams_size.nteams > 1) {
5638 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5639 master->th.th_teams_level == team->t.t_level) {
5645 kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;
5647 if (level < __kmp_hot_teams_max_level) {
5648 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5655 TCW_SYNC_PTR(team->t.t_pkfn,
5658 team->t.t_copyin_counter = 0;
5663 if (!use_hot_team) {
5664 if (__kmp_tasking_mode != tskm_immediate_exec) {
5666 for (f = 1; f < team->t.t_nproc; ++f) {
5667 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5668 kmp_info_t *th = team->t.t_threads[f];
5669 volatile kmp_uint32 *state = &th->th.th_reap_state;
5670 while (*state != KMP_SAFE_TO_REAP) {
5674 if (!__kmp_is_thread_alive(th, &ecode)) {
5675 *state = KMP_SAFE_TO_REAP;
5680 kmp_flag_64<> fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5681 if (fl.is_sleeping())
5682 fl.resume(__kmp_gtid_from_thread(th));
5689 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5690 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5691 if (task_team != NULL) {
5692 for (f = 0; f < team->t.t_nproc; ++f) {
5693 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5694 team->t.t_threads[f]->th.th_task_team = NULL;
5698 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5699 __kmp_get_gtid(), task_team, team->t.t_id));
5700#if KMP_NESTED_HOT_TEAMS
5701 __kmp_free_task_team(master, task_team);
5703 team->t.t_task_team[tt_idx] = NULL;
5709 team->t.t_parent = NULL;
5710 team->t.t_level = 0;
5711 team->t.t_active_level = 0;
5714 for (f = 1; f < team->t.t_nproc; ++f) {
5715 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5716 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5717 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),
5720 __kmp_free_thread(team->t.t_threads[f]);
5723 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5726 team->t.b->go_release();
5727 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5728 for (f = 1; f < team->t.t_nproc; ++f) {
5729 if (team->t.b->sleep[f].sleep) {
5730 __kmp_atomic_resume_64(
5731 team->t.t_threads[f]->th.th_info.ds.ds_gtid,
5732 (kmp_atomic_flag_64<> *)NULL);
5737 for (
int f = 1; f < team->t.t_nproc; ++f) {
5738 while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)
5744 for (f = 1; f < team->t.t_nproc; ++f) {
5745 team->t.t_threads[f] = NULL;
5748 if (team->t.t_max_nproc > 1 &&
5749 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
5750 distributedBarrier::deallocate(team->t.b);
5755 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5756 __kmp_team_pool = (
volatile kmp_team_t *)team;
5759 KMP_DEBUG_ASSERT(team->t.t_threads[1] &&
5760 team->t.t_threads[1]->th.th_cg_roots);
5761 if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {
5763 for (f = 1; f < team->t.t_nproc; ++f) {
5764 kmp_info_t *thr = team->t.t_threads[f];
5765 KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&
5766 thr->th.th_cg_roots->cg_root == thr);
5768 kmp_cg_root_t *tmp = thr->th.th_cg_roots;
5769 thr->th.th_cg_roots = tmp->up;
5770 KA_TRACE(100, (
"__kmp_free_team: Thread %p popping node %p and moving"
5771 " up to node %p. cg_nthreads was %d\n",
5772 thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));
5773 int i = tmp->cg_nthreads--;
5778 if (thr->th.th_cg_roots)
5779 thr->th.th_current_task->td_icvs.thread_limit =
5780 thr->th.th_cg_roots->cg_thread_limit;
5789kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5790 kmp_team_t *next_pool = team->t.t_next_pool;
5792 KMP_DEBUG_ASSERT(team);
5793 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5794 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5795 KMP_DEBUG_ASSERT(team->t.t_threads);
5796 KMP_DEBUG_ASSERT(team->t.t_argv);
5801 __kmp_free_team_arrays(team);
5802 if (team->t.t_argv != &team->t.t_inline_argv[0])
5803 __kmp_free((
void *)team->t.t_argv);
5835void __kmp_free_thread(kmp_info_t *this_th) {
5839 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5840 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5842 KMP_DEBUG_ASSERT(this_th);
5847 kmp_balign_t *balign = this_th->th.th_bar;
5848 for (b = 0; b < bs_last_barrier; ++b) {
5849 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5850 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5851 balign[b].bb.team = NULL;
5852 balign[b].bb.leaf_kids = 0;
5854 this_th->th.th_task_state = 0;
5855 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5858 TCW_PTR(this_th->th.th_team, NULL);
5859 TCW_PTR(this_th->th.th_root, NULL);
5860 TCW_PTR(this_th->th.th_dispatch, NULL);
5862 while (this_th->th.th_cg_roots) {
5863 this_th->th.th_cg_roots->cg_nthreads--;
5864 KA_TRACE(100, (
"__kmp_free_thread: Thread %p decrement cg_nthreads on node"
5865 " %p of thread %p to %d\n",
5866 this_th, this_th->th.th_cg_roots,
5867 this_th->th.th_cg_roots->cg_root,
5868 this_th->th.th_cg_roots->cg_nthreads));
5869 kmp_cg_root_t *tmp = this_th->th.th_cg_roots;
5870 if (tmp->cg_root == this_th) {
5871 KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);
5873 5, (
"__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));
5874 this_th->th.th_cg_roots = tmp->up;
5877 if (tmp->cg_nthreads == 0) {
5880 this_th->th.th_cg_roots = NULL;
5890 __kmp_free_implicit_task(this_th);
5891 this_th->th.th_current_task = NULL;
5895 gtid = this_th->th.th_info.ds.ds_gtid;
5896 if (__kmp_thread_pool_insert_pt != NULL) {
5897 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5898 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5899 __kmp_thread_pool_insert_pt = NULL;
5908 if (__kmp_thread_pool_insert_pt != NULL) {
5909 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5911 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5913 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5914 scan = &((*scan)->th.th_next_pool))
5919 TCW_PTR(this_th->th.th_next_pool, *scan);
5920 __kmp_thread_pool_insert_pt = *scan = this_th;
5921 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5922 (this_th->th.th_info.ds.ds_gtid <
5923 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5924 TCW_4(this_th->th.th_in_pool, TRUE);
5925 __kmp_suspend_initialize_thread(this_th);
5926 __kmp_lock_suspend_mx(this_th);
5927 if (this_th->th.th_active == TRUE) {
5928 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
5929 this_th->th.th_active_in_pool = TRUE;
5933 KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);
5936 __kmp_unlock_suspend_mx(this_th);
5938 TCW_4(__kmp_nth, __kmp_nth - 1);
5940#ifdef KMP_ADJUST_BLOCKTIME
5943 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5944 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5945 if (__kmp_nth <= __kmp_avail_proc) {
5946 __kmp_zero_bt = FALSE;
5956void *__kmp_launch_thread(kmp_info_t *this_thr) {
5957#if OMP_PROFILING_SUPPORT
5958 ProfileTraceFile = getenv(
"LIBOMPTARGET_PROFILE");
5960 if (ProfileTraceFile)
5961 llvm::timeTraceProfilerInitialize(500 ,
"libomptarget");
5964 int gtid = this_thr->th.th_info.ds.ds_gtid;
5966 kmp_team_t **
volatile pteam;
5969 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5971 if (__kmp_env_consistency_check) {
5972 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5976 if (ompd_state & OMPD_ENABLE_BP)
5977 ompd_bp_thread_begin();
5981 ompt_data_t *thread_data =
nullptr;
5982 if (ompt_enabled.enabled) {
5983 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5984 *thread_data = ompt_data_none;
5986 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
5987 this_thr->th.ompt_thread_info.wait_id = 0;
5988 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5989 this_thr->th.ompt_thread_info.parallel_flags = 0;
5990 if (ompt_enabled.ompt_callback_thread_begin) {
5991 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5992 ompt_thread_worker, thread_data);
5994 this_thr->th.ompt_thread_info.state = ompt_state_idle;
5999 while (!TCR_4(__kmp_global.g.g_done)) {
6000 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
6004 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
6007 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
6010 if (ompt_enabled.enabled) {
6011 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6015 pteam = &this_thr->th.th_team;
6018 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
6020 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
6023 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6024 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6025 (*pteam)->t.t_pkfn));
6027 updateHWFPControl(*pteam);
6030 if (ompt_enabled.enabled) {
6031 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
6035 rc = (*pteam)->t.t_invoke(gtid);
6039 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6040 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
6041 (*pteam)->t.t_pkfn));
6044 if (ompt_enabled.enabled) {
6046 __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
6048 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
6052 __kmp_join_barrier(gtid);
6055 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
6058 if (ompd_state & OMPD_ENABLE_BP)
6059 ompd_bp_thread_end();
6063 if (ompt_enabled.ompt_callback_thread_end) {
6064 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
6068 this_thr->th.th_task_team = NULL;
6070 __kmp_common_destroy_gtid(gtid);
6072 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
6075#if OMP_PROFILING_SUPPORT
6076 llvm::timeTraceProfilerFinishThread();
6083void __kmp_internal_end_dest(
void *specific_gtid) {
6086 __kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);
6088 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6092 __kmp_internal_end_thread(gtid);
6095#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
6097__attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
6098 __kmp_internal_end_atexit();
6105void __kmp_internal_end_atexit(
void) {
6106 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
6130 __kmp_internal_end_library(-1);
6132 __kmp_close_console();
6136static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
6141 KMP_DEBUG_ASSERT(thread != NULL);
6143 gtid = thread->th.th_info.ds.ds_gtid;
6146 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
6149 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
6151 if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
6153 !KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))
6155 __kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);
6159 kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
6161 __kmp_release_64(&flag);
6166 __kmp_reap_worker(thread);
6178 if (thread->th.th_active_in_pool) {
6179 thread->th.th_active_in_pool = FALSE;
6180 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
6181 KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);
6185 __kmp_free_implicit_task(thread);
6189 __kmp_free_fast_memory(thread);
6192 __kmp_suspend_uninitialize_thread(thread);
6194 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
6195 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6200#ifdef KMP_ADJUST_BLOCKTIME
6203 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6204 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6205 if (__kmp_nth <= __kmp_avail_proc) {
6206 __kmp_zero_bt = FALSE;
6212 if (__kmp_env_consistency_check) {
6213 if (thread->th.th_cons) {
6214 __kmp_free_cons_stack(thread->th.th_cons);
6215 thread->th.th_cons = NULL;
6219 if (thread->th.th_pri_common != NULL) {
6220 __kmp_free(thread->th.th_pri_common);
6221 thread->th.th_pri_common = NULL;
6224 if (thread->th.th_task_state_memo_stack != NULL) {
6225 __kmp_free(thread->th.th_task_state_memo_stack);
6226 thread->th.th_task_state_memo_stack = NULL;
6230 if (thread->th.th_local.bget_data != NULL) {
6231 __kmp_finalize_bget(thread);
6235#if KMP_AFFINITY_SUPPORTED
6236 if (thread->th.th_affin_mask != NULL) {
6237 KMP_CPU_FREE(thread->th.th_affin_mask);
6238 thread->th.th_affin_mask = NULL;
6242#if KMP_USE_HIER_SCHED
6243 if (thread->th.th_hier_bar_data != NULL) {
6244 __kmp_free(thread->th.th_hier_bar_data);
6245 thread->th.th_hier_bar_data = NULL;
6249 __kmp_reap_team(thread->th.th_serial_team);
6250 thread->th.th_serial_team = NULL;
6257static void __kmp_itthash_clean(kmp_info_t *th) {
6259 if (__kmp_itt_region_domains.count > 0) {
6260 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6261 kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];
6263 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6264 __kmp_thread_free(th, bucket);
6269 if (__kmp_itt_barrier_domains.count > 0) {
6270 for (
int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {
6271 kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];
6273 kmp_itthash_entry_t *next = bucket->next_in_bucket;
6274 __kmp_thread_free(th, bucket);
6282static void __kmp_internal_end(
void) {
6286 __kmp_unregister_library();
6293 __kmp_reclaim_dead_roots();
6297 for (i = 0; i < __kmp_threads_capacity; i++)
6299 if (__kmp_root[i]->r.r_active)
6302 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6304 if (i < __kmp_threads_capacity) {
6316 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6317 if (TCR_4(__kmp_init_monitor)) {
6318 __kmp_reap_monitor(&__kmp_monitor);
6319 TCW_4(__kmp_init_monitor, 0);
6321 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6322 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6328 for (i = 0; i < __kmp_threads_capacity; i++) {
6329 if (__kmp_root[i]) {
6332 KMP_ASSERT(!__kmp_root[i]->r.r_active);
6341 while (__kmp_thread_pool != NULL) {
6343 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
6344 __kmp_thread_pool = thread->th.th_next_pool;
6346 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
6347 thread->th.th_next_pool = NULL;
6348 thread->th.th_in_pool = FALSE;
6349 __kmp_reap_thread(thread, 0);
6351 __kmp_thread_pool_insert_pt = NULL;
6354 while (__kmp_team_pool != NULL) {
6356 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
6357 __kmp_team_pool = team->t.t_next_pool;
6359 team->t.t_next_pool = NULL;
6360 __kmp_reap_team(team);
6363 __kmp_reap_task_teams();
6370 for (i = 0; i < __kmp_threads_capacity; i++) {
6371 kmp_info_t *thr = __kmp_threads[i];
6372 while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))
6377 for (i = 0; i < __kmp_threads_capacity; ++i) {
6384 TCW_SYNC_4(__kmp_init_common, FALSE);
6386 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
6394 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
6395 if (TCR_4(__kmp_init_monitor)) {
6396 __kmp_reap_monitor(&__kmp_monitor);
6397 TCW_4(__kmp_init_monitor, 0);
6399 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
6400 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
6403 TCW_4(__kmp_init_gtid, FALSE);
6412void __kmp_internal_end_library(
int gtid_req) {
6419 if (__kmp_global.g.g_abort) {
6420 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
6424 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6425 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
6430 if (TCR_4(__kmp_init_hidden_helper) &&
6431 !TCR_4(__kmp_hidden_helper_team_done)) {
6432 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6434 __kmp_hidden_helper_main_thread_release();
6436 __kmp_hidden_helper_threads_deinitz_wait();
6442 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6444 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6445 if (gtid == KMP_GTID_SHUTDOWN) {
6446 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system "
6447 "already shutdown\n"));
6449 }
else if (gtid == KMP_GTID_MONITOR) {
6450 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not "
6451 "registered, or system shutdown\n"));
6453 }
else if (gtid == KMP_GTID_DNE) {
6454 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system "
6457 }
else if (KMP_UBER_GTID(gtid)) {
6459 if (__kmp_root[gtid]->r.r_active) {
6460 __kmp_global.g.g_abort = -1;
6461 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6462 __kmp_unregister_library();
6464 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6468 __kmp_itthash_clean(__kmp_threads[gtid]);
6471 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6472 __kmp_unregister_root_current_thread(gtid);
6479#ifdef DUMP_DEBUG_ON_EXIT
6480 if (__kmp_debug_buf)
6481 __kmp_dump_debug_buffer();
6486 __kmp_unregister_library();
6491 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6494 if (__kmp_global.g.g_abort) {
6495 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6497 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6500 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6501 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6510 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6513 __kmp_internal_end();
6515 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6516 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6518 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6520#ifdef DUMP_DEBUG_ON_EXIT
6521 if (__kmp_debug_buf)
6522 __kmp_dump_debug_buffer();
6526 __kmp_close_console();
6529 __kmp_fini_allocator();
6533void __kmp_internal_end_thread(
int gtid_req) {
6542 if (__kmp_global.g.g_abort) {
6543 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6547 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6548 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6553 if (TCR_4(__kmp_init_hidden_helper) &&
6554 !TCR_4(__kmp_hidden_helper_team_done)) {
6555 TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);
6557 __kmp_hidden_helper_main_thread_release();
6559 __kmp_hidden_helper_threads_deinitz_wait();
6566 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6568 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6569 if (gtid == KMP_GTID_SHUTDOWN) {
6570 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system "
6571 "already shutdown\n"));
6573 }
else if (gtid == KMP_GTID_MONITOR) {
6574 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not "
6575 "registered, or system shutdown\n"));
6577 }
else if (gtid == KMP_GTID_DNE) {
6578 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system "
6582 }
else if (KMP_UBER_GTID(gtid)) {
6584 if (__kmp_root[gtid]->r.r_active) {
6585 __kmp_global.g.g_abort = -1;
6586 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6588 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6592 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6594 __kmp_unregister_root_current_thread(gtid);
6598 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6601 __kmp_threads[gtid]->th.th_task_team = NULL;
6605 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6611 if (__kmp_pause_status != kmp_hard_paused)
6615 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6620 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6623 if (__kmp_global.g.g_abort) {
6624 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6626 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6629 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6630 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6641 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6643 for (i = 0; i < __kmp_threads_capacity; ++i) {
6644 if (KMP_UBER_GTID(i)) {
6647 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6648 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6649 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6656 __kmp_internal_end();
6658 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6659 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6661 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6663#ifdef DUMP_DEBUG_ON_EXIT
6664 if (__kmp_debug_buf)
6665 __kmp_dump_debug_buffer();
6672static long __kmp_registration_flag = 0;
6674static char *__kmp_registration_str = NULL;
6677static inline char *__kmp_reg_status_name() {
6683#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB
6684 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d_%d", (
int)getpid(),
6687 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6691void __kmp_register_library_startup(
void) {
6693 char *name = __kmp_reg_status_name();
6699#if KMP_ARCH_X86 || KMP_ARCH_X86_64
6700 __kmp_initialize_system_tick();
6702 __kmp_read_system_time(&time.dtime);
6703 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6704 __kmp_registration_str =
6705 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6706 __kmp_registration_flag, KMP_LIBRARY_FILE);
6708 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6709 __kmp_registration_str));
6715#if defined(KMP_USE_SHM)
6716 char *shm_name = __kmp_str_format(
"/%s", name);
6717 int shm_preexist = 0;
6719 int fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0666);
6720 if ((fd1 == -1) && (errno == EEXIST)) {
6723 fd1 = shm_open(shm_name, O_RDWR, 0666);
6726 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM"), KMP_ERR(0),
6732 }
else if (fd1 == -1) {
6735 __kmp_fatal(KMP_MSG(FunctionError,
"Can't open SHM2"), KMP_ERR(errno),
6738 if (shm_preexist == 0) {
6740 if (ftruncate(fd1, SHM_SIZE) == -1) {
6742 __kmp_fatal(KMP_MSG(FunctionError,
"Can't set size of SHM"),
6743 KMP_ERR(errno), __kmp_msg_null);
6747 (
char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd1, 0);
6748 if (data1 == MAP_FAILED) {
6750 __kmp_fatal(KMP_MSG(FunctionError,
"Can't map SHM"), KMP_ERR(errno),
6753 if (shm_preexist == 0) {
6754 KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);
6757 value = __kmp_str_format(
"%s", data1);
6758 munmap(data1, SHM_SIZE);
6762 __kmp_env_set(name, __kmp_registration_str, 0);
6764 value = __kmp_env_get(name);
6767 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6774 char *flag_addr_str = NULL;
6775 char *flag_val_str = NULL;
6776 char const *file_name = NULL;
6777 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6778 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6781 unsigned long *flag_addr = 0;
6782 unsigned long flag_val = 0;
6783 KMP_SSCANF(flag_addr_str,
"%p", RCAST(
void **, &flag_addr));
6784 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6785 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6789 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6803 file_name =
"unknown library";
6808 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6809 if (!__kmp_str_match_true(duplicate_ok)) {
6811 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6812 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6814 KMP_INTERNAL_FREE(duplicate_ok);
6815 __kmp_duplicate_library_ok = 1;
6820#if defined(KMP_USE_SHM)
6822 shm_unlink(shm_name);
6825 __kmp_env_unset(name);
6829 KMP_DEBUG_ASSERT(0);
6833 KMP_INTERNAL_FREE((
void *)value);
6834#if defined(KMP_USE_SHM)
6835 KMP_INTERNAL_FREE((
void *)shm_name);
6838 KMP_INTERNAL_FREE((
void *)name);
6842void __kmp_unregister_library(
void) {
6844 char *name = __kmp_reg_status_name();
6847#if defined(KMP_USE_SHM)
6848 char *shm_name = __kmp_str_format(
"/%s", name);
6849 int fd1 = shm_open(shm_name, O_RDONLY, 0666);
6854 char *data1 = (
char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);
6855 if (data1 != MAP_FAILED) {
6856 value = __kmp_str_format(
"%s", data1);
6857 munmap(data1, SHM_SIZE);
6861 value = __kmp_env_get(name);
6864 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6865 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6866 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6868#if defined(KMP_USE_SHM)
6869 shm_unlink(shm_name);
6871 __kmp_env_unset(name);
6875#if defined(KMP_USE_SHM)
6876 KMP_INTERNAL_FREE(shm_name);
6879 KMP_INTERNAL_FREE(__kmp_registration_str);
6880 KMP_INTERNAL_FREE(value);
6881 KMP_INTERNAL_FREE(name);
6883 __kmp_registration_flag = 0;
6884 __kmp_registration_str = NULL;
6891#if KMP_MIC_SUPPORTED
6893static void __kmp_check_mic_type() {
6894 kmp_cpuid_t cpuid_state = {0};
6895 kmp_cpuid_t *cs_p = &cpuid_state;
6896 __kmp_x86_cpuid(1, 0, cs_p);
6898 if ((cs_p->eax & 0xff0) == 0xB10) {
6899 __kmp_mic_type = mic2;
6900 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6901 __kmp_mic_type = mic3;
6903 __kmp_mic_type = non_mic;
6910static void __kmp_user_level_mwait_init() {
6911 struct kmp_cpuid buf;
6912 __kmp_x86_cpuid(7, 0, &buf);
6913 __kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);
6914 __kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;
6915 __kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);
6916 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",
6917 __kmp_umwait_enabled));
6920#ifndef AT_INTELPHIUSERMWAIT
6923#define AT_INTELPHIUSERMWAIT 10000
6928unsigned long getauxval(
unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;
6929unsigned long getauxval(
unsigned long) {
return 0; }
6931static void __kmp_user_level_mwait_init() {
6936 if (__kmp_mic_type == mic3) {
6937 unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);
6938 if ((res & 0x1) || __kmp_user_level_mwait) {
6939 __kmp_mwait_enabled = TRUE;
6940 if (__kmp_user_level_mwait) {
6941 KMP_INFORM(EnvMwaitWarn);
6944 __kmp_mwait_enabled = FALSE;
6947 KF_TRACE(30, (
"__kmp_user_level_mwait_init: __kmp_mic_type = %d, "
6948 "__kmp_mwait_enabled = %d\n",
6949 __kmp_mic_type, __kmp_mwait_enabled));
6953static void __kmp_do_serial_initialize(
void) {
6957 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6959 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6960 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6961 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6962 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6963 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6973 __kmp_validate_locks();
6976 __kmp_init_allocator();
6982 if (__kmp_need_register_serial)
6983 __kmp_register_library_startup();
6986 if (TCR_4(__kmp_global.g.g_done)) {
6987 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6990 __kmp_global.g.g_abort = 0;
6991 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6994#if KMP_USE_ADAPTIVE_LOCKS
6995#if KMP_DEBUG_ADAPTIVE_LOCKS
6996 __kmp_init_speculative_stats();
6999#if KMP_STATS_ENABLED
7002 __kmp_init_lock(&__kmp_global_lock);
7003 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
7004 __kmp_init_lock(&__kmp_debug_lock);
7005 __kmp_init_atomic_lock(&__kmp_atomic_lock);
7006 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
7007 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
7008 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
7009 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
7010 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
7011 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
7012 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
7013 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
7014 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
7015 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
7016 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
7017 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
7018 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
7019 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
7021 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
7023 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
7027 __kmp_runtime_initialize();
7029#if KMP_MIC_SUPPORTED
7030 __kmp_check_mic_type();
7037 __kmp_abort_delay = 0;
7041 __kmp_dflt_team_nth_ub = __kmp_xproc;
7042 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
7043 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7045 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
7046 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7048 __kmp_max_nth = __kmp_sys_max_nth;
7049 __kmp_cg_max_nth = __kmp_sys_max_nth;
7050 __kmp_teams_max_nth = __kmp_xproc;
7051 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
7052 __kmp_teams_max_nth = __kmp_sys_max_nth;
7057 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7059 __kmp_monitor_wakeups =
7060 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7061 __kmp_bt_intervals =
7062 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
7065 __kmp_library = library_throughput;
7067 __kmp_static = kmp_sch_static_balanced;
7074#if KMP_FAST_REDUCTION_BARRIER
7075#define kmp_reduction_barrier_gather_bb ((int)1)
7076#define kmp_reduction_barrier_release_bb ((int)1)
7077#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt
7078#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt
7080 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
7081 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
7082 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
7083 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
7084 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
7085#if KMP_FAST_REDUCTION_BARRIER
7086 if (i == bs_reduction_barrier) {
7088 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
7089 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
7090 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
7091 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
7095#if KMP_FAST_REDUCTION_BARRIER
7096#undef kmp_reduction_barrier_release_pat
7097#undef kmp_reduction_barrier_gather_pat
7098#undef kmp_reduction_barrier_release_bb
7099#undef kmp_reduction_barrier_gather_bb
7101#if KMP_MIC_SUPPORTED
7102 if (__kmp_mic_type == mic2) {
7104 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
7105 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
7107 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7108 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
7110#if KMP_FAST_REDUCTION_BARRIER
7111 if (__kmp_mic_type == mic2) {
7112 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7113 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
7120 __kmp_env_checks = TRUE;
7122 __kmp_env_checks = FALSE;
7126 __kmp_foreign_tp = TRUE;
7128 __kmp_global.g.g_dynamic = FALSE;
7129 __kmp_global.g.g_dynamic_mode = dynamic_default;
7131 __kmp_init_nesting_mode();
7133 __kmp_env_initialize(NULL);
7135#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
7136 __kmp_user_level_mwait_init();
7140 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
7141 if (__kmp_str_match_true(val)) {
7142 kmp_str_buf_t buffer;
7143 __kmp_str_buf_init(&buffer);
7144 __kmp_i18n_dump_catalog(&buffer);
7145 __kmp_printf(
"%s", buffer.str);
7146 __kmp_str_buf_free(&buffer);
7148 __kmp_env_free(&val);
7151 __kmp_threads_capacity =
7152 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
7154 __kmp_tp_capacity = __kmp_default_tp_capacity(
7155 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7160 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
7161 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
7162 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
7163 __kmp_thread_pool = NULL;
7164 __kmp_thread_pool_insert_pt = NULL;
7165 __kmp_team_pool = NULL;
7172 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
7174 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
7175 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
7176 sizeof(kmp_info_t *) * __kmp_threads_capacity);
7179 KMP_DEBUG_ASSERT(__kmp_all_nth ==
7181 KMP_DEBUG_ASSERT(__kmp_nth == 0);
7186 gtid = __kmp_register_root(TRUE);
7187 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
7188 KMP_ASSERT(KMP_UBER_GTID(gtid));
7189 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
7193 __kmp_common_initialize();
7197 __kmp_register_atfork();
7204 int rc = atexit(__kmp_internal_end_atexit);
7206 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
7212#if KMP_HANDLE_SIGNALS
7218 __kmp_install_signals(FALSE);
7221 __kmp_install_signals(TRUE);
7226 __kmp_init_counter++;
7228 __kmp_init_serial = TRUE;
7230 if (__kmp_settings) {
7234 if (__kmp_display_env || __kmp_display_env_verbose) {
7235 __kmp_env_print_2();
7244 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
7247void __kmp_serial_initialize(
void) {
7248 if (__kmp_init_serial) {
7251 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7252 if (__kmp_init_serial) {
7253 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7256 __kmp_do_serial_initialize();
7257 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7260static void __kmp_do_middle_initialize(
void) {
7262 int prev_dflt_team_nth;
7264 if (!__kmp_init_serial) {
7265 __kmp_do_serial_initialize();
7268 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
7270 if (UNLIKELY(!__kmp_need_register_serial)) {
7273 __kmp_register_library_startup();
7278 prev_dflt_team_nth = __kmp_dflt_team_nth;
7280#if KMP_AFFINITY_SUPPORTED
7283 __kmp_affinity_initialize();
7287 KMP_ASSERT(__kmp_xproc > 0);
7288 if (__kmp_avail_proc == 0) {
7289 __kmp_avail_proc = __kmp_xproc;
7295 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
7296 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
7301 if (__kmp_dflt_team_nth == 0) {
7302#ifdef KMP_DFLT_NTH_CORES
7304 __kmp_dflt_team_nth = __kmp_ncores;
7305 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7306 "__kmp_ncores (%d)\n",
7307 __kmp_dflt_team_nth));
7310 __kmp_dflt_team_nth = __kmp_avail_proc;
7311 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = "
7312 "__kmp_avail_proc(%d)\n",
7313 __kmp_dflt_team_nth));
7317 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
7318 __kmp_dflt_team_nth = KMP_MIN_NTH;
7320 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
7321 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7324 if (__kmp_nesting_mode > 0)
7325 __kmp_set_nesting_mode_threads();
7329 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
7331 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
7336 for (i = 0; i < __kmp_threads_capacity; i++) {
7337 kmp_info_t *thread = __kmp_threads[i];
7340 if (thread->th.th_current_task->td_icvs.nproc != 0)
7343 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
7348 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7349 __kmp_dflt_team_nth));
7351#ifdef KMP_ADJUST_BLOCKTIME
7353 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
7354 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
7355 if (__kmp_nth > __kmp_avail_proc) {
7356 __kmp_zero_bt = TRUE;
7362 TCW_SYNC_4(__kmp_init_middle, TRUE);
7364 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
7367void __kmp_middle_initialize(
void) {
7368 if (__kmp_init_middle) {
7371 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7372 if (__kmp_init_middle) {
7373 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7376 __kmp_do_middle_initialize();
7377 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7380void __kmp_parallel_initialize(
void) {
7381 int gtid = __kmp_entry_gtid();
7384 if (TCR_4(__kmp_init_parallel))
7386 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7387 if (TCR_4(__kmp_init_parallel)) {
7388 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7393 if (TCR_4(__kmp_global.g.g_done)) {
7396 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
7397 __kmp_infinite_loop();
7403 if (!__kmp_init_middle) {
7404 __kmp_do_middle_initialize();
7406 __kmp_assign_root_init_mask();
7407 __kmp_resume_if_hard_paused();
7410 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
7411 KMP_ASSERT(KMP_UBER_GTID(gtid));
7413#if KMP_ARCH_X86 || KMP_ARCH_X86_64
7416 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
7417 __kmp_store_mxcsr(&__kmp_init_mxcsr);
7418 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7422#if KMP_HANDLE_SIGNALS
7424 __kmp_install_signals(TRUE);
7428 __kmp_suspend_initialize();
7430#if defined(USE_LOAD_BALANCE)
7431 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7432 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7435 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
7436 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7440 if (__kmp_version) {
7441 __kmp_print_version_2();
7445 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7448 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
7450 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7453void __kmp_hidden_helper_initialize() {
7454 if (TCR_4(__kmp_init_hidden_helper))
7458 if (!TCR_4(__kmp_init_parallel))
7459 __kmp_parallel_initialize();
7463 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7464 if (TCR_4(__kmp_init_hidden_helper)) {
7465 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7470 KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
7474 TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);
7477 __kmp_do_initialize_hidden_helper_threads();
7480 __kmp_hidden_helper_threads_initz_wait();
7483 TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);
7485 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7490void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7492 kmp_disp_t *dispatch;
7497 this_thr->th.th_local.this_construct = 0;
7499 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
7501 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7502 KMP_DEBUG_ASSERT(dispatch);
7503 KMP_DEBUG_ASSERT(team->t.t_dispatch);
7507 dispatch->th_disp_index = 0;
7508 dispatch->th_doacross_buf_idx = 0;
7509 if (__kmp_env_consistency_check)
7510 __kmp_push_parallel(gtid, team->t.t_ident);
7515void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7517 if (__kmp_env_consistency_check)
7518 __kmp_pop_parallel(gtid, team->t.t_ident);
7520 __kmp_finish_implicit_task(this_thr);
7523int __kmp_invoke_task_func(
int gtid) {
7525 int tid = __kmp_tid_from_gtid(gtid);
7526 kmp_info_t *this_thr = __kmp_threads[gtid];
7527 kmp_team_t *team = this_thr->th.th_team;
7529 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
7531 if (__itt_stack_caller_create_ptr) {
7533 if (team->t.t_stack_id != NULL) {
7534 __kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);
7536 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7537 __kmp_itt_stack_callee_enter(
7538 (__itt_caller)team->t.t_parent->t.t_stack_id);
7542#if INCLUDE_SSC_MARKS
7543 SSC_MARK_INVOKING();
7548 void **exit_frame_p;
7549 ompt_data_t *my_task_data;
7550 ompt_data_t *my_parallel_data;
7553 if (ompt_enabled.enabled) {
7554 exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]
7555 .ompt_task_info.frame.exit_frame.ptr);
7557 exit_frame_p = &dummy;
7561 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
7562 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
7563 if (ompt_enabled.ompt_callback_implicit_task) {
7564 ompt_team_size = team->t.t_nproc;
7565 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7566 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
7567 __kmp_tid_from_gtid(gtid), ompt_task_implicit);
7568 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
7572#if KMP_STATS_ENABLED
7574 if (previous_state == stats_state_e::TEAMS_REGION) {
7575 KMP_PUSH_PARTITIONED_TIMER(OMP_teams);
7577 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);
7579 KMP_SET_THREAD_STATE(IMPLICIT_TASK);
7582 rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
7583 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
7590 *exit_frame_p = NULL;
7591 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_team;
7594#if KMP_STATS_ENABLED
7595 if (previous_state == stats_state_e::TEAMS_REGION) {
7596 KMP_SET_THREAD_STATE(previous_state);
7598 KMP_POP_PARTITIONED_TIMER();
7602 if (__itt_stack_caller_create_ptr) {
7604 if (team->t.t_stack_id != NULL) {
7605 __kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);
7607 KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);
7608 __kmp_itt_stack_callee_leave(
7609 (__itt_caller)team->t.t_parent->t.t_stack_id);
7613 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
7618void __kmp_teams_master(
int gtid) {
7620 kmp_info_t *thr = __kmp_threads[gtid];
7621 kmp_team_t *team = thr->th.th_team;
7622 ident_t *loc = team->t.t_ident;
7623 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
7624 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
7625 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
7626 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
7627 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
7630 kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(
sizeof(kmp_cg_root_t));
7633 tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;
7634 tmp->cg_nthreads = 1;
7635 KA_TRACE(100, (
"__kmp_teams_master: Thread %p created node %p and init"
7636 " cg_nthreads to 1\n",
7638 tmp->up = thr->th.th_cg_roots;
7639 thr->th.th_cg_roots = tmp;
7643#if INCLUDE_SSC_MARKS
7646 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
7647 (microtask_t)thr->th.th_teams_microtask,
7648 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
7649#if INCLUDE_SSC_MARKS
7653 if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)
7654 thr->th.th_teams_size.nth = thr->th.th_team_nproc;
7657 __kmp_join_call(loc, gtid
7666int __kmp_invoke_teams_master(
int gtid) {
7667 kmp_info_t *this_thr = __kmp_threads[gtid];
7668 kmp_team_t *team = this_thr->th.th_team;
7670 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7671 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7672 (
void *)__kmp_teams_master);
7674 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7676 int tid = __kmp_tid_from_gtid(gtid);
7677 ompt_data_t *task_data =
7678 &team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;
7679 ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;
7680 if (ompt_enabled.ompt_callback_implicit_task) {
7681 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7682 ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,
7684 OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;
7687 __kmp_teams_master(gtid);
7689 this_thr->th.ompt_thread_info.parallel_flags |= ompt_parallel_league;
7691 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7700void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7701 kmp_info_t *thr = __kmp_threads[gtid];
7703 if (num_threads > 0)
7704 thr->th.th_set_nproc = num_threads;
7707static void __kmp_push_thread_limit(kmp_info_t *thr,
int num_teams,
7709 KMP_DEBUG_ASSERT(thr);
7711 if (!TCR_4(__kmp_init_middle))
7712 __kmp_middle_initialize();
7713 __kmp_assign_root_init_mask();
7714 KMP_DEBUG_ASSERT(__kmp_avail_proc);
7715 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
7717 if (num_threads == 0) {
7718 if (__kmp_teams_thread_limit > 0) {
7719 num_threads = __kmp_teams_thread_limit;
7721 num_threads = __kmp_avail_proc / num_teams;
7726 if (num_threads > __kmp_dflt_team_nth) {
7727 num_threads = __kmp_dflt_team_nth;
7729 if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {
7730 num_threads = thr->th.th_current_task->td_icvs.thread_limit;
7732 if (num_teams * num_threads > __kmp_teams_max_nth) {
7733 num_threads = __kmp_teams_max_nth / num_teams;
7735 if (num_threads == 0) {
7739 if (num_threads < 0) {
7740 __kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),
7746 thr->th.th_current_task->td_icvs.thread_limit = num_threads;
7748 if (num_threads > __kmp_dflt_team_nth) {
7749 num_threads = __kmp_dflt_team_nth;
7751 if (num_teams * num_threads > __kmp_teams_max_nth) {
7752 int new_threads = __kmp_teams_max_nth / num_teams;
7753 if (new_threads == 0) {
7756 if (new_threads != num_threads) {
7757 if (!__kmp_reserve_warn) {
7758 __kmp_reserve_warn = 1;
7759 __kmp_msg(kmp_ms_warning,
7760 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7761 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7764 num_threads = new_threads;
7767 thr->th.th_teams_size.nth = num_threads;
7772void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7774 kmp_info_t *thr = __kmp_threads[gtid];
7775 if (num_teams < 0) {
7778 __kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),
7782 if (num_teams == 0) {
7783 if (__kmp_nteams > 0) {
7784 num_teams = __kmp_nteams;
7789 if (num_teams > __kmp_teams_max_nth) {
7790 if (!__kmp_reserve_warn) {
7791 __kmp_reserve_warn = 1;
7792 __kmp_msg(kmp_ms_warning,
7793 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7794 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7796 num_teams = __kmp_teams_max_nth;
7800 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7802 __kmp_push_thread_limit(thr, num_teams, num_threads);
7807void __kmp_push_num_teams_51(
ident_t *
id,
int gtid,
int num_teams_lb,
7808 int num_teams_ub,
int num_threads) {
7809 kmp_info_t *thr = __kmp_threads[gtid];
7810 KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
7811 KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
7812 KMP_DEBUG_ASSERT(num_threads >= 0);
7814 if (num_teams_lb > num_teams_ub) {
7815 __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
7816 KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
7821 if (num_teams_lb == 0 && num_teams_ub > 0)
7822 num_teams_lb = num_teams_ub;
7824 if (num_teams_lb == 0 && num_teams_ub == 0) {
7825 num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
7826 if (num_teams > __kmp_teams_max_nth) {
7827 if (!__kmp_reserve_warn) {
7828 __kmp_reserve_warn = 1;
7829 __kmp_msg(kmp_ms_warning,
7830 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7831 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7833 num_teams = __kmp_teams_max_nth;
7835 }
else if (num_teams_lb == num_teams_ub) {
7836 num_teams = num_teams_ub;
7838 if (num_threads <= 0) {
7839 if (num_teams_ub > __kmp_teams_max_nth) {
7840 num_teams = num_teams_lb;
7842 num_teams = num_teams_ub;
7845 num_teams = (num_threads > __kmp_teams_max_nth)
7847 : __kmp_teams_max_nth / num_threads;
7848 if (num_teams < num_teams_lb) {
7849 num_teams = num_teams_lb;
7850 }
else if (num_teams > num_teams_ub) {
7851 num_teams = num_teams_ub;
7857 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7859 __kmp_push_thread_limit(thr, num_teams, num_threads);
7863void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7864 kmp_info_t *thr = __kmp_threads[gtid];
7865 thr->th.th_set_proc_bind = proc_bind;
7870void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7871 kmp_info_t *this_thr = __kmp_threads[gtid];
7877 KMP_DEBUG_ASSERT(team);
7878 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7879 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7882 team->t.t_construct = 0;
7883 team->t.t_ordered.dt.t_value =
7887 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7888 if (team->t.t_max_nproc > 1) {
7890 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7891 team->t.t_disp_buffer[i].buffer_index = i;
7892 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7895 team->t.t_disp_buffer[0].buffer_index = 0;
7896 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7900 KMP_ASSERT(this_thr->th.th_team == team);
7903 for (f = 0; f < team->t.t_nproc; f++) {
7904 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7905 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7910 __kmp_fork_barrier(gtid, 0);
7913void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7914 kmp_info_t *this_thr = __kmp_threads[gtid];
7916 KMP_DEBUG_ASSERT(team);
7917 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7918 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7924 if (__kmp_threads[gtid] &&
7925 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7926 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7927 __kmp_threads[gtid]);
7928 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "
7929 "team->t.t_nproc=%d\n",
7930 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7932 __kmp_print_structure();
7934 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7935 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7938 __kmp_join_barrier(gtid);
7940 if (ompt_enabled.enabled &&
7941 this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
7942 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7943 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7944 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
7946 void *codeptr = NULL;
7947 if (KMP_MASTER_TID(ds_tid) &&
7948 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7949 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7950 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7952 if (ompt_enabled.ompt_callback_sync_region_wait) {
7953 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7954 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7957 if (ompt_enabled.ompt_callback_sync_region) {
7958 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7959 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, task_data,
7963 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7964 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7965 ompt_scope_end, NULL, task_data, 0, ds_tid,
7966 ompt_task_implicit);
7972 KMP_ASSERT(this_thr->th.th_team == team);
7977#ifdef USE_LOAD_BALANCE
7981static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7984 kmp_team_t *hot_team;
7986 if (root->r.r_active) {
7989 hot_team = root->r.r_hot_team;
7990 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7991 return hot_team->t.t_nproc - 1;
7996 for (i = 1; i < hot_team->t.t_nproc; i++) {
7997 if (hot_team->t.t_threads[i]->th.th_active) {
8006static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
8009 int hot_team_active;
8010 int team_curr_active;
8013 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
8015 KMP_DEBUG_ASSERT(root);
8016 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
8017 ->th.th_current_task->td_icvs.dynamic == TRUE);
8018 KMP_DEBUG_ASSERT(set_nproc > 1);
8020 if (set_nproc == 1) {
8021 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
8030 pool_active = __kmp_thread_pool_active_nth;
8031 hot_team_active = __kmp_active_hot_team_nproc(root);
8032 team_curr_active = pool_active + hot_team_active + 1;
8035 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
8036 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d "
8037 "hot team active = %d\n",
8038 system_active, pool_active, hot_team_active));
8040 if (system_active < 0) {
8044 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8045 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
8048 retval = __kmp_avail_proc - __kmp_nth +
8049 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
8050 if (retval > set_nproc) {
8053 if (retval < KMP_MIN_NTH) {
8054 retval = KMP_MIN_NTH;
8057 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
8065 if (system_active < team_curr_active) {
8066 system_active = team_curr_active;
8068 retval = __kmp_avail_proc - system_active + team_curr_active;
8069 if (retval > set_nproc) {
8072 if (retval < KMP_MIN_NTH) {
8073 retval = KMP_MIN_NTH;
8076 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
8085void __kmp_cleanup(
void) {
8088 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
8090 if (TCR_4(__kmp_init_parallel)) {
8091#if KMP_HANDLE_SIGNALS
8092 __kmp_remove_signals();
8094 TCW_4(__kmp_init_parallel, FALSE);
8097 if (TCR_4(__kmp_init_middle)) {
8098#if KMP_AFFINITY_SUPPORTED
8099 __kmp_affinity_uninitialize();
8101 __kmp_cleanup_hierarchy();
8102 TCW_4(__kmp_init_middle, FALSE);
8105 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
8107 if (__kmp_init_serial) {
8108 __kmp_runtime_destroy();
8109 __kmp_init_serial = FALSE;
8112 __kmp_cleanup_threadprivate_caches();
8114 for (f = 0; f < __kmp_threads_capacity; f++) {
8115 if (__kmp_root[f] != NULL) {
8116 __kmp_free(__kmp_root[f]);
8117 __kmp_root[f] = NULL;
8120 __kmp_free(__kmp_threads);
8123 __kmp_threads = NULL;
8125 __kmp_threads_capacity = 0;
8128 kmp_old_threads_list_t *ptr = __kmp_old_threads_list;
8130 kmp_old_threads_list_t *next = ptr->next;
8131 __kmp_free(ptr->threads);
8136#if KMP_USE_DYNAMIC_LOCK
8137 __kmp_cleanup_indirect_user_locks();
8139 __kmp_cleanup_user_locks();
8143 __kmp_free(ompd_env_block);
8144 ompd_env_block = NULL;
8145 ompd_env_block_size = 0;
8149#if KMP_AFFINITY_SUPPORTED
8150 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
8151 __kmp_cpuinfo_file = NULL;
8154#if KMP_USE_ADAPTIVE_LOCKS
8155#if KMP_DEBUG_ADAPTIVE_LOCKS
8156 __kmp_print_speculative_stats();
8159 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
8160 __kmp_nested_nth.nth = NULL;
8161 __kmp_nested_nth.size = 0;
8162 __kmp_nested_nth.used = 0;
8163 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
8164 __kmp_nested_proc_bind.bind_types = NULL;
8165 __kmp_nested_proc_bind.size = 0;
8166 __kmp_nested_proc_bind.used = 0;
8167 if (__kmp_affinity_format) {
8168 KMP_INTERNAL_FREE(__kmp_affinity_format);
8169 __kmp_affinity_format = NULL;
8172 __kmp_i18n_catclose();
8174#if KMP_USE_HIER_SCHED
8175 __kmp_hier_scheds.deallocate();
8178#if KMP_STATS_ENABLED
8182 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
8187int __kmp_ignore_mppbeg(
void) {
8190 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
8191 if (__kmp_str_match_false(env))
8198int __kmp_ignore_mppend(
void) {
8201 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
8202 if (__kmp_str_match_false(env))
8209void __kmp_internal_begin(
void) {
8215 gtid = __kmp_entry_gtid();
8216 root = __kmp_threads[gtid]->th.th_root;
8217 KMP_ASSERT(KMP_UBER_GTID(gtid));
8219 if (root->r.r_begin)
8221 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
8222 if (root->r.r_begin) {
8223 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8227 root->r.r_begin = TRUE;
8229 __kmp_release_lock(&root->r.r_begin_lock, gtid);
8234void __kmp_user_set_library(
enum library_type arg) {
8241 gtid = __kmp_entry_gtid();
8242 thread = __kmp_threads[gtid];
8244 root = thread->th.th_root;
8246 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
8248 if (root->r.r_in_parallel) {
8250 KMP_WARNING(SetLibraryIncorrectCall);
8255 case library_serial:
8256 thread->th.th_set_nproc = 0;
8257 set__nproc(thread, 1);
8259 case library_turnaround:
8260 thread->th.th_set_nproc = 0;
8261 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8262 : __kmp_dflt_team_nth_ub);
8264 case library_throughput:
8265 thread->th.th_set_nproc = 0;
8266 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
8267 : __kmp_dflt_team_nth_ub);
8270 KMP_FATAL(UnknownLibraryType, arg);
8273 __kmp_aux_set_library(arg);
8276void __kmp_aux_set_stacksize(
size_t arg) {
8277 if (!__kmp_init_serial)
8278 __kmp_serial_initialize();
8281 if (arg & (0x1000 - 1)) {
8282 arg &= ~(0x1000 - 1);
8287 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8290 if (!TCR_4(__kmp_init_parallel)) {
8293 if (value < __kmp_sys_min_stksize)
8294 value = __kmp_sys_min_stksize;
8295 else if (value > KMP_MAX_STKSIZE)
8296 value = KMP_MAX_STKSIZE;
8298 __kmp_stksize = value;
8300 __kmp_env_stksize = TRUE;
8303 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8308void __kmp_aux_set_library(
enum library_type arg) {
8309 __kmp_library = arg;
8311 switch (__kmp_library) {
8312 case library_serial: {
8313 KMP_INFORM(LibraryIsSerial);
8315 case library_turnaround:
8316 if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)
8317 __kmp_use_yield = 2;
8319 case library_throughput:
8320 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
8321 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
8324 KMP_FATAL(UnknownLibraryType, arg);
8330static kmp_team_t *__kmp_aux_get_team_info(
int &teams_serialized) {
8331 kmp_info_t *thr = __kmp_entry_thread();
8332 teams_serialized = 0;
8333 if (thr->th.th_teams_microtask) {
8334 kmp_team_t *team = thr->th.th_team;
8335 int tlevel = thr->th.th_teams_level;
8336 int ii = team->t.t_level;
8337 teams_serialized = team->t.t_serialized;
8338 int level = tlevel + 1;
8339 KMP_DEBUG_ASSERT(ii >= tlevel);
8340 while (ii > level) {
8341 for (teams_serialized = team->t.t_serialized;
8342 (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
8344 if (team->t.t_serialized && (!teams_serialized)) {
8345 team = team->t.t_parent;
8349 team = team->t.t_parent;
8358int __kmp_aux_get_team_num() {
8360 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8362 if (serialized > 1) {
8365 return team->t.t_master_tid;
8371int __kmp_aux_get_num_teams() {
8373 kmp_team_t *team = __kmp_aux_get_team_info(serialized);
8375 if (serialized > 1) {
8378 return team->t.t_parent->t.t_nproc;
8417typedef struct kmp_affinity_format_field_t {
8419 const char *long_name;
8422} kmp_affinity_format_field_t;
8424static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
8425#if KMP_AFFINITY_SUPPORTED
8426 {
'A',
"thread_affinity",
's'},
8428 {
't',
"team_num",
'd'},
8429 {
'T',
"num_teams",
'd'},
8430 {
'L',
"nesting_level",
'd'},
8431 {
'n',
"thread_num",
'd'},
8432 {
'N',
"num_threads",
'd'},
8433 {
'a',
"ancestor_tnum",
'd'},
8435 {
'P',
"process_id",
'd'},
8436 {
'i',
"native_thread_id",
'd'}};
8439static int __kmp_aux_capture_affinity_field(
int gtid,
const kmp_info_t *th,
8441 kmp_str_buf_t *field_buffer) {
8442 int rc, format_index, field_value;
8443 const char *width_left, *width_right;
8444 bool pad_zeros, right_justify, parse_long_name, found_valid_name;
8445 static const int FORMAT_SIZE = 20;
8446 char format[FORMAT_SIZE] = {0};
8447 char absolute_short_name = 0;
8449 KMP_DEBUG_ASSERT(gtid >= 0);
8450 KMP_DEBUG_ASSERT(th);
8451 KMP_DEBUG_ASSERT(**ptr ==
'%');
8452 KMP_DEBUG_ASSERT(field_buffer);
8454 __kmp_str_buf_clear(field_buffer);
8461 __kmp_str_buf_cat(field_buffer,
"%", 1);
8472 right_justify =
false;
8474 right_justify =
true;
8478 width_left = width_right = NULL;
8479 if (**ptr >=
'0' && **ptr <=
'9') {
8487 format[format_index++] =
'%';
8489 format[format_index++] =
'-';
8491 format[format_index++] =
'0';
8492 if (width_left && width_right) {
8496 while (i < 8 && width_left < width_right) {
8497 format[format_index++] = *width_left;
8505 found_valid_name =
false;
8506 parse_long_name = (**ptr ==
'{');
8507 if (parse_long_name)
8509 for (
size_t i = 0; i <
sizeof(__kmp_affinity_format_table) /
8510 sizeof(__kmp_affinity_format_table[0]);
8512 char short_name = __kmp_affinity_format_table[i].short_name;
8513 const char *long_name = __kmp_affinity_format_table[i].long_name;
8514 char field_format = __kmp_affinity_format_table[i].field_format;
8515 if (parse_long_name) {
8516 size_t length = KMP_STRLEN(long_name);
8517 if (strncmp(*ptr, long_name, length) == 0) {
8518 found_valid_name =
true;
8521 }
else if (**ptr == short_name) {
8522 found_valid_name =
true;
8525 if (found_valid_name) {
8526 format[format_index++] = field_format;
8527 format[format_index++] =
'\0';
8528 absolute_short_name = short_name;
8532 if (parse_long_name) {
8534 absolute_short_name = 0;
8542 switch (absolute_short_name) {
8544 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
8547 rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
8550 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
8553 rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
8556 static const int BUFFER_SIZE = 256;
8557 char buf[BUFFER_SIZE];
8558 __kmp_expand_host_name(buf, BUFFER_SIZE);
8559 rc = __kmp_str_buf_print(field_buffer, format, buf);
8562 rc = __kmp_str_buf_print(field_buffer, format, getpid());
8565 rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
8568 rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
8572 __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
8573 rc = __kmp_str_buf_print(field_buffer, format, field_value);
8575#if KMP_AFFINITY_SUPPORTED
8578 __kmp_str_buf_init(&buf);
8579 __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
8580 rc = __kmp_str_buf_print(field_buffer, format, buf.str);
8581 __kmp_str_buf_free(&buf);
8587 rc = __kmp_str_buf_print(field_buffer,
"%s",
"undefined");
8589 if (parse_long_name) {
8598 KMP_ASSERT(format_index <= FORMAT_SIZE);
8608size_t __kmp_aux_capture_affinity(
int gtid,
const char *format,
8609 kmp_str_buf_t *buffer) {
8610 const char *parse_ptr;
8612 const kmp_info_t *th;
8613 kmp_str_buf_t field;
8615 KMP_DEBUG_ASSERT(buffer);
8616 KMP_DEBUG_ASSERT(gtid >= 0);
8618 __kmp_str_buf_init(&field);
8619 __kmp_str_buf_clear(buffer);
8621 th = __kmp_threads[gtid];
8627 if (parse_ptr == NULL || *parse_ptr ==
'\0') {
8628 parse_ptr = __kmp_affinity_format;
8630 KMP_DEBUG_ASSERT(parse_ptr);
8632 while (*parse_ptr !=
'\0') {
8634 if (*parse_ptr ==
'%') {
8636 int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
8637 __kmp_str_buf_catbuf(buffer, &field);
8641 __kmp_str_buf_cat(buffer, parse_ptr, 1);
8646 __kmp_str_buf_free(&field);
8651void __kmp_aux_display_affinity(
int gtid,
const char *format) {
8653 __kmp_str_buf_init(&buf);
8654 __kmp_aux_capture_affinity(gtid, format, &buf);
8655 __kmp_fprintf(kmp_out,
"%s" KMP_END_OF_LINE, buf.str);
8656 __kmp_str_buf_free(&buf);
8661void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
8662 int blocktime = arg;
8668 __kmp_save_internal_controls(thread);
8671 if (blocktime < KMP_MIN_BLOCKTIME)
8672 blocktime = KMP_MIN_BLOCKTIME;
8673 else if (blocktime > KMP_MAX_BLOCKTIME)
8674 blocktime = KMP_MAX_BLOCKTIME;
8676 set__blocktime_team(thread->th.th_team, tid, blocktime);
8677 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
8681 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8683 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
8684 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
8690 set__bt_set_team(thread->th.th_team, tid, bt_set);
8691 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
8693 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "
8694 "bt_intervals=%d, monitor_updates=%d\n",
8695 __kmp_gtid_from_tid(tid, thread->th.th_team),
8696 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
8697 __kmp_monitor_wakeups));
8699 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
8700 __kmp_gtid_from_tid(tid, thread->th.th_team),
8701 thread->th.th_team->t.t_id, tid, blocktime));
8705void __kmp_aux_set_defaults(
char const *str,
size_t len) {
8706 if (!__kmp_init_serial) {
8707 __kmp_serial_initialize();
8709 __kmp_env_initialize(str);
8711 if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {
8719PACKED_REDUCTION_METHOD_T
8720__kmp_determine_reduction_method(
8721 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
8722 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8723 kmp_critical_name *lck) {
8734 PACKED_REDUCTION_METHOD_T retval;
8738 KMP_DEBUG_ASSERT(loc);
8739 KMP_DEBUG_ASSERT(lck);
8741#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \
8743 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))
8744#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))
8746 retval = critical_reduce_block;
8749 team_size = __kmp_get_team_num_threads(global_tid);
8750 if (team_size == 1) {
8752 retval = empty_reduce_block;
8756 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8758#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \
8759 KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64
8761#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
8762 KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
8764 int teamsize_cutoff = 4;
8766#if KMP_MIC_SUPPORTED
8767 if (__kmp_mic_type != non_mic) {
8768 teamsize_cutoff = 8;
8771 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8772 if (tree_available) {
8773 if (team_size <= teamsize_cutoff) {
8774 if (atomic_available) {
8775 retval = atomic_reduce_block;
8778 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8780 }
else if (atomic_available) {
8781 retval = atomic_reduce_block;
8784#error "Unknown or unsupported OS"
8788#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
8790#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_HURD
8794 if (atomic_available) {
8795 if (num_vars <= 2) {
8796 retval = atomic_reduce_block;
8802 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8803 if (atomic_available && (num_vars <= 3)) {
8804 retval = atomic_reduce_block;
8805 }
else if (tree_available) {
8806 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
8807 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
8808 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8813#error "Unknown or unsupported OS"
8817#error "Unknown or unsupported architecture"
8825 if (__kmp_force_reduction_method != reduction_method_not_defined &&
8828 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
8830 int atomic_available, tree_available;
8832 switch ((forced_retval = __kmp_force_reduction_method)) {
8833 case critical_reduce_block:
8837 case atomic_reduce_block:
8838 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8839 if (!atomic_available) {
8840 KMP_WARNING(RedMethodNotSupported,
"atomic");
8841 forced_retval = critical_reduce_block;
8845 case tree_reduce_block:
8846 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8847 if (!tree_available) {
8848 KMP_WARNING(RedMethodNotSupported,
"tree");
8849 forced_retval = critical_reduce_block;
8851#if KMP_FAST_REDUCTION_BARRIER
8852 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8861 retval = forced_retval;
8864 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
8866#undef FAST_REDUCTION_TREE_METHOD_GENERATED
8867#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8872kmp_int32 __kmp_get_reduce_method(
void) {
8873 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
8878void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
8882void __kmp_hard_pause() {
8883 __kmp_pause_status = kmp_hard_paused;
8884 __kmp_internal_end_thread(-1);
8888void __kmp_resume_if_soft_paused() {
8889 if (__kmp_pause_status == kmp_soft_paused) {
8890 __kmp_pause_status = kmp_not_paused;
8892 for (
int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {
8893 kmp_info_t *thread = __kmp_threads[gtid];
8895 kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,
8897 if (fl.is_sleeping())
8899 else if (__kmp_try_suspend_mx(thread)) {
8900 __kmp_unlock_suspend_mx(thread);
8903 if (fl.is_sleeping()) {
8906 }
else if (__kmp_try_suspend_mx(thread)) {
8907 __kmp_unlock_suspend_mx(thread);
8919int __kmp_pause_resource(kmp_pause_status_t level) {
8920 if (level == kmp_not_paused) {
8921 if (__kmp_pause_status == kmp_not_paused) {
8925 KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||
8926 __kmp_pause_status == kmp_hard_paused);
8927 __kmp_pause_status = kmp_not_paused;
8930 }
else if (level == kmp_soft_paused) {
8931 if (__kmp_pause_status != kmp_not_paused) {
8938 }
else if (level == kmp_hard_paused) {
8939 if (__kmp_pause_status != kmp_not_paused) {
8952void __kmp_omp_display_env(
int verbose) {
8953 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
8954 if (__kmp_init_serial == 0)
8955 __kmp_do_serial_initialize();
8956 __kmp_display_env_impl(!verbose, verbose);
8957 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
8961void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
8963 KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==
8965 kmp_info_t **other_threads = team->t.t_threads;
8969 for (
int f = 1; f < old_nthreads; ++f) {
8970 KMP_DEBUG_ASSERT(other_threads[f] != NULL);
8972 if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {
8978 if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {
8979 while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)
8983 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);
8985 team->t.t_threads[f]->th.th_used_in_team.store(2);
8986 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);
8989 team->t.b->go_release();
8995 int count = old_nthreads - 1;
8997 count = old_nthreads - 1;
8998 for (
int f = 1; f < old_nthreads; ++f) {
8999 if (other_threads[f]->th.th_used_in_team.load() != 0) {
9000 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9001 kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(
9002 void *, other_threads[f]->th.th_sleep_loc);
9003 __kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);
9006 KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);
9012 team->t.b->update_num_threads(new_nthreads);
9013 team->t.b->go_reset();
9016void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads) {
9018 KMP_DEBUG_ASSERT(team);
9024 for (
int f = 1; f < new_nthreads; ++f) {
9025 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
9026 KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,
9028 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
9029 __kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,
9030 (kmp_flag_32<false, false> *)NULL);
9036 int count = new_nthreads - 1;
9038 count = new_nthreads - 1;
9039 for (
int f = 1; f < new_nthreads; ++f) {
9040 if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {
9048kmp_info_t **__kmp_hidden_helper_threads;
9049kmp_info_t *__kmp_hidden_helper_main_thread;
9050std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;
9052kmp_int32 __kmp_hidden_helper_threads_num = 8;
9053kmp_int32 __kmp_enable_hidden_helper = TRUE;
9055kmp_int32 __kmp_hidden_helper_threads_num = 0;
9056kmp_int32 __kmp_enable_hidden_helper = FALSE;
9060std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;
9062void __kmp_hidden_helper_wrapper_fn(
int *gtid,
int *, ...) {
9067 KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);
9068 while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=
9069 __kmp_hidden_helper_threads_num)
9075 TCW_4(__kmp_init_hidden_helper_threads, FALSE);
9076 __kmp_hidden_helper_initz_release();
9077 __kmp_hidden_helper_main_thread_wait();
9079 for (
int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {
9080 __kmp_hidden_helper_worker_thread_signal();
9086void __kmp_hidden_helper_threads_initz_routine() {
9088 const int gtid = __kmp_register_root(TRUE);
9089 __kmp_hidden_helper_main_thread = __kmp_threads[gtid];
9090 __kmp_hidden_helper_threads = &__kmp_threads[gtid];
9091 __kmp_hidden_helper_main_thread->th.th_set_nproc =
9092 __kmp_hidden_helper_threads_num;
9094 KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);
9099 TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);
9101 __kmp_hidden_helper_threads_deinitz_release();
9121void __kmp_init_nesting_mode() {
9122 int levels = KMP_HW_LAST;
9123 __kmp_nesting_mode_nlevels = levels;
9124 __kmp_nesting_nth_level = (
int *)KMP_INTERNAL_MALLOC(levels *
sizeof(
int));
9125 for (
int i = 0; i < levels; ++i)
9126 __kmp_nesting_nth_level[i] = 0;
9127 if (__kmp_nested_nth.size < levels) {
9128 __kmp_nested_nth.nth =
9129 (
int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels *
sizeof(
int));
9130 __kmp_nested_nth.size = levels;
9135void __kmp_set_nesting_mode_threads() {
9136 kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
9138 if (__kmp_nesting_mode == 1)
9139 __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
9140 else if (__kmp_nesting_mode > 1)
9141 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9143 if (__kmp_topology) {
9145 for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
9146 loc < __kmp_nesting_mode_nlevels;
9147 loc++, hw_level++) {
9148 __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
9149 if (__kmp_nesting_nth_level[loc] == 1)
9153 if (__kmp_nesting_mode > 1 && loc > 1) {
9154 int core_level = __kmp_topology->get_level(KMP_HW_CORE);
9155 int num_cores = __kmp_topology->get_count(core_level);
9156 int upper_levels = 1;
9157 for (
int level = 0; level < loc - 1; ++level)
9158 upper_levels *= __kmp_nesting_nth_level[level];
9159 if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
9160 __kmp_nesting_nth_level[loc - 1] =
9161 num_cores / __kmp_nesting_nth_level[loc - 2];
9163 __kmp_nesting_mode_nlevels = loc;
9164 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9166 if (__kmp_avail_proc >= 4) {
9167 __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
9168 __kmp_nesting_nth_level[1] = 2;
9169 __kmp_nesting_mode_nlevels = 2;
9171 __kmp_nesting_nth_level[0] = __kmp_avail_proc;
9172 __kmp_nesting_mode_nlevels = 1;
9174 __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
9176 for (
int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
9177 __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
9179 set__nproc(thread, __kmp_nesting_nth_level[0]);
9180 if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
9181 __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
9182 if (get__max_active_levels(thread) > 1) {
9184 __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
9186 if (__kmp_nesting_mode == 1)
9187 set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)
KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, kmpc_micro microtask,...)
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the partitioned timers to begin with name.
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
stats_state_e
the states which a thread can be in
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid)