21#include "ompt-specific.h"
23#define MAX_MESSAGE 512
39 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
44 }
else if (__kmp_ignore_mppbeg() == FALSE) {
46 __kmp_internal_begin();
47 KC_TRACE(10, (
"__kmpc_begin: called\n"));
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE(10, (
"__kmpc_end: called\n"));
67 KA_TRACE(30, (
"__kmpc_end\n"));
69 __kmp_internal_end_thread(-1);
71#if KMP_OS_WINDOWS && OMPT_SUPPORT
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
100 kmp_int32 gtid = __kmp_entry_gtid();
102 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
123 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
125 return TCR_4(__kmp_all_nth);
135 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
145 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
167 if (__kmp_par_range == 0) {
174 semi2 = strchr(semi2,
';');
178 semi2 = strchr(semi2 + 1,
';');
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
187 if ((*name ==
'/') || (*name ==
';')) {
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
194 semi3 = strchr(semi2 + 1,
';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
201 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
205 return __kmp_par_range < 0;
219 return __kmp_entry_thread()->th.th_root->r.r_active;
232 kmp_int32 num_threads) {
233 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid, num_threads));
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
239void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
244void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
263 int gtid = __kmp_entry_gtid();
265#if (KMP_STATS_ENABLED)
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
285 va_start(ap, microtask);
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
294 OMPT_STORE_RETURN_ADDRESS(gtid);
300 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301 VOLATILE_CAST(microtask_t) microtask,
302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
307 __kmp_join_call(loc, gtid
317 if (ompt_enabled.enabled) {
318 ompt_frame->enter_frame = ompt_data_none;
324 if (previous_state == stats_state_e::SERIAL_REGION) {
325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326 KMP_SET_THREAD_STATE(previous_state);
328 KMP_POP_PARTITIONED_TIMER();
345 kmp_int32 num_teams, kmp_int32 num_threads) {
347 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
348 global_tid, num_teams, num_threads));
349 __kmp_assert_valid_gtid(global_tid);
350 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
370 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
371 kmp_int32 num_threads) {
372 KA_TRACE(20, (
"__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
373 " num_teams_ub=%d num_threads=%d\n",
374 global_tid, num_teams_lb, num_teams_ub, num_threads));
375 __kmp_assert_valid_gtid(global_tid);
376 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
392 int gtid = __kmp_entry_gtid();
393 kmp_info_t *this_thr = __kmp_threads[gtid];
395 va_start(ap, microtask);
400 if (previous_state == stats_state_e::SERIAL_REGION) {
401 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
403 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
408 this_thr->th.th_teams_microtask = microtask;
409 this_thr->th.th_teams_level =
410 this_thr->th.th_team->t.t_level;
413 kmp_team_t *parent_team = this_thr->th.th_team;
414 int tid = __kmp_tid_from_gtid(gtid);
415 if (ompt_enabled.enabled) {
416 parent_team->t.t_implicit_task_taskdata[tid]
417 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
419 OMPT_STORE_RETURN_ADDRESS(gtid);
424 if (this_thr->th.th_teams_size.nteams == 0) {
425 __kmp_push_num_teams(loc, gtid, 0, 0);
427 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
428 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
432 loc, gtid, fork_context_intel, argc,
433 VOLATILE_CAST(microtask_t) __kmp_teams_master,
434 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
435 __kmp_join_call(loc, gtid
443 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
444 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
445 this_thr->th.th_cg_roots = tmp->up;
446 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up"
447 " to node %p. cg_nthreads was %d\n",
448 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
449 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
450 int i = tmp->cg_nthreads--;
455 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
456 this_thr->th.th_current_task->td_icvs.thread_limit =
457 this_thr->th.th_cg_roots->cg_thread_limit;
459 this_thr->th.th_teams_microtask = NULL;
460 this_thr->th.th_teams_level = 0;
461 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
464 if (previous_state == stats_state_e::SERIAL_REGION) {
465 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
466 KMP_SET_THREAD_STATE(previous_state);
468 KMP_POP_PARTITIONED_TIMER();
477int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
495 __kmp_assert_valid_gtid(global_tid);
497 OMPT_STORE_RETURN_ADDRESS(global_tid);
499 __kmp_serialized_parallel(loc, global_tid);
510 kmp_internal_control_t *top;
511 kmp_info_t *this_thr;
512 kmp_team_t *serial_team;
515 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
523 __kmp_assert_valid_gtid(global_tid);
524 if (!TCR_4(__kmp_init_parallel))
525 __kmp_parallel_initialize();
527 __kmp_resume_if_soft_paused();
529 this_thr = __kmp_threads[global_tid];
530 serial_team = this_thr->th.th_serial_team;
532 kmp_task_team_t *task_team = this_thr->th.th_task_team;
534 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
535 task_team->tt.tt_hidden_helper_task_encountered))
536 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
539 KMP_DEBUG_ASSERT(serial_team);
540 KMP_ASSERT(serial_team->t.t_serialized);
541 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
542 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
543 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
544 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
547 if (ompt_enabled.enabled &&
548 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
549 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
550 if (ompt_enabled.ompt_callback_implicit_task) {
551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
552 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
553 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
557 ompt_data_t *parent_task_data;
558 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
560 if (ompt_enabled.ompt_callback_parallel_end) {
561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
562 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
563 ompt_parallel_invoker_program | ompt_parallel_team,
564 OMPT_LOAD_RETURN_ADDRESS(global_tid));
566 __ompt_lw_taskteam_unlink(this_thr);
567 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
573 top = serial_team->t.t_control_stack_top;
574 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
575 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
576 serial_team->t.t_control_stack_top = top->next;
581 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
583 dispatch_private_info_t *disp_buffer =
584 serial_team->t.t_dispatch->th_disp_buffer;
585 serial_team->t.t_dispatch->th_disp_buffer =
586 serial_team->t.t_dispatch->th_disp_buffer->next;
587 __kmp_free(disp_buffer);
589 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
591 --serial_team->t.t_serialized;
592 if (serial_team->t.t_serialized == 0) {
596#if KMP_ARCH_X86 || KMP_ARCH_X86_64
597 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
598 __kmp_clear_x87_fpu_status_word();
599 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
600 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
604 __kmp_pop_current_task_from_thread(this_thr);
606 if (ompd_state & OMPD_ENABLE_BP)
607 ompd_bp_parallel_end();
610 this_thr->th.th_team = serial_team->t.t_parent;
611 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
614 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
615 this_thr->th.th_team_master =
616 serial_team->t.t_parent->t.t_threads[0];
617 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
620 this_thr->th.th_dispatch =
621 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
623 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
624 this_thr->th.th_current_task->td_flags.executing = 1;
626 if (__kmp_tasking_mode != tskm_immediate_exec) {
628 this_thr->th.th_task_team =
629 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
631 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
633 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
635#if KMP_AFFINITY_SUPPORTED
636 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
637 __kmp_reset_root_init_mask(global_tid);
641 if (__kmp_tasking_mode != tskm_immediate_exec) {
642 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting "
643 "depth of serial team %p to %d\n",
644 global_tid, serial_team, serial_team->t.t_serialized));
648 serial_team->t.t_level--;
649 if (__kmp_env_consistency_check)
650 __kmp_pop_parallel(global_tid, NULL);
652 if (ompt_enabled.enabled)
653 this_thr->th.ompt_thread_info.state =
654 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
655 : ompt_state_work_parallel);
668 KC_TRACE(10, (
"__kmpc_flush: called\n"));
673#if OMPT_SUPPORT && OMPT_OPTIONAL
674 if (ompt_enabled.ompt_callback_flush) {
675 ompt_callbacks.ompt_callback(ompt_callback_flush)(
676 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
691 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
692 __kmp_assert_valid_gtid(global_tid);
694 if (!TCR_4(__kmp_init_parallel))
695 __kmp_parallel_initialize();
697 __kmp_resume_if_soft_paused();
699 if (__kmp_env_consistency_check) {
701 KMP_WARNING(ConstructIdentInvalid);
703 __kmp_check_barrier(global_tid, ct_barrier, loc);
707 ompt_frame_t *ompt_frame;
708 if (ompt_enabled.enabled) {
709 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
710 if (ompt_frame->enter_frame.ptr == NULL)
711 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
713 OMPT_STORE_RETURN_ADDRESS(global_tid);
715 __kmp_threads[global_tid]->th.th_ident = loc;
723 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
724#if OMPT_SUPPORT && OMPT_OPTIONAL
725 if (ompt_enabled.enabled) {
726 ompt_frame->enter_frame = ompt_data_none;
741 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
742 __kmp_assert_valid_gtid(global_tid);
744 if (!TCR_4(__kmp_init_parallel))
745 __kmp_parallel_initialize();
747 __kmp_resume_if_soft_paused();
749 if (KMP_MASTER_GTID(global_tid)) {
751 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
755#if OMPT_SUPPORT && OMPT_OPTIONAL
757 if (ompt_enabled.ompt_callback_masked) {
758 kmp_info_t *this_thr = __kmp_threads[global_tid];
759 kmp_team_t *team = this_thr->th.th_team;
761 int tid = __kmp_tid_from_gtid(global_tid);
762 ompt_callbacks.ompt_callback(ompt_callback_masked)(
763 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
764 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
765 OMPT_GET_RETURN_ADDRESS(0));
770 if (__kmp_env_consistency_check) {
771#if KMP_USE_DYNAMIC_LOCK
773 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
775 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
778 __kmp_push_sync(global_tid, ct_master, loc, NULL);
780 __kmp_check_sync(global_tid, ct_master, loc, NULL);
796 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
797 __kmp_assert_valid_gtid(global_tid);
798 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
799 KMP_POP_PARTITIONED_TIMER();
801#if OMPT_SUPPORT && OMPT_OPTIONAL
802 kmp_info_t *this_thr = __kmp_threads[global_tid];
803 kmp_team_t *team = this_thr->th.th_team;
804 if (ompt_enabled.ompt_callback_masked) {
805 int tid = __kmp_tid_from_gtid(global_tid);
806 ompt_callbacks.ompt_callback(ompt_callback_masked)(
807 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
808 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
809 OMPT_GET_RETURN_ADDRESS(0));
813 if (__kmp_env_consistency_check) {
814 if (KMP_MASTER_GTID(global_tid))
815 __kmp_pop_sync(global_tid, ct_master, loc);
830 KC_TRACE(10, (
"__kmpc_masked: called T#%d\n", global_tid));
831 __kmp_assert_valid_gtid(global_tid);
833 if (!TCR_4(__kmp_init_parallel))
834 __kmp_parallel_initialize();
836 __kmp_resume_if_soft_paused();
838 tid = __kmp_tid_from_gtid(global_tid);
841 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
845#if OMPT_SUPPORT && OMPT_OPTIONAL
847 if (ompt_enabled.ompt_callback_masked) {
848 kmp_info_t *this_thr = __kmp_threads[global_tid];
849 kmp_team_t *team = this_thr->th.th_team;
850 ompt_callbacks.ompt_callback(ompt_callback_masked)(
851 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
852 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
853 OMPT_GET_RETURN_ADDRESS(0));
858 if (__kmp_env_consistency_check) {
859#if KMP_USE_DYNAMIC_LOCK
861 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
863 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
866 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
868 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
884 KC_TRACE(10, (
"__kmpc_end_masked: called T#%d\n", global_tid));
885 __kmp_assert_valid_gtid(global_tid);
886 KMP_POP_PARTITIONED_TIMER();
888#if OMPT_SUPPORT && OMPT_OPTIONAL
889 kmp_info_t *this_thr = __kmp_threads[global_tid];
890 kmp_team_t *team = this_thr->th.th_team;
891 if (ompt_enabled.ompt_callback_masked) {
892 int tid = __kmp_tid_from_gtid(global_tid);
893 ompt_callbacks.ompt_callback(ompt_callback_masked)(
894 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
895 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
896 OMPT_GET_RETURN_ADDRESS(0));
900 if (__kmp_env_consistency_check) {
901 __kmp_pop_sync(global_tid, ct_masked, loc);
915 KMP_DEBUG_ASSERT(__kmp_init_serial);
917 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
918 __kmp_assert_valid_gtid(gtid);
920 if (!TCR_4(__kmp_init_parallel))
921 __kmp_parallel_initialize();
923 __kmp_resume_if_soft_paused();
926 __kmp_itt_ordered_prep(gtid);
930 th = __kmp_threads[gtid];
932#if OMPT_SUPPORT && OMPT_OPTIONAL
936 OMPT_STORE_RETURN_ADDRESS(gtid);
937 if (ompt_enabled.enabled) {
938 team = __kmp_team_from_gtid(gtid);
939 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
941 th->th.ompt_thread_info.wait_id = lck;
942 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
945 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
946 if (ompt_enabled.ompt_callback_mutex_acquire) {
947 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
948 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
954 if (th->th.th_dispatch->th_deo_fcn != 0)
955 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
957 __kmp_parallel_deo(>id, &cid, loc);
959#if OMPT_SUPPORT && OMPT_OPTIONAL
960 if (ompt_enabled.enabled) {
962 th->th.ompt_thread_info.state = ompt_state_work_parallel;
963 th->th.ompt_thread_info.wait_id = 0;
966 if (ompt_enabled.ompt_callback_mutex_acquired) {
967 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
968 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
974 __kmp_itt_ordered_start(gtid);
989 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
990 __kmp_assert_valid_gtid(gtid);
993 __kmp_itt_ordered_end(gtid);
997 th = __kmp_threads[gtid];
999 if (th->th.th_dispatch->th_dxo_fcn != 0)
1000 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
1002 __kmp_parallel_dxo(>id, &cid, loc);
1004#if OMPT_SUPPORT && OMPT_OPTIONAL
1005 OMPT_STORE_RETURN_ADDRESS(gtid);
1006 if (ompt_enabled.ompt_callback_mutex_released) {
1007 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1009 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1010 ->t.t_ordered.dt.t_value,
1011 OMPT_LOAD_RETURN_ADDRESS(gtid));
1016#if KMP_USE_DYNAMIC_LOCK
1018static __forceinline
void
1019__kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
1020 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1024 kmp_indirect_lock_t **lck;
1025 lck = (kmp_indirect_lock_t **)crit;
1026 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1027 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1028 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1029 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1031 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1033 __kmp_itt_critical_creating(ilk->lock, loc);
1035 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
1038 __kmp_itt_critical_destroyed(ilk->lock);
1044 KMP_DEBUG_ASSERT(*lck != NULL);
1048#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1050 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1051 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1052 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1053 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1054 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1056 KMP_FSYNC_PREPARE(l); \
1057 KMP_INIT_YIELD(spins); \
1058 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1060 if (TCR_4(__kmp_nth) > \
1061 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1064 KMP_YIELD_SPIN(spins); \
1066 __kmp_spin_backoff(&backoff); \
1068 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1069 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1071 KMP_FSYNC_ACQUIRED(l); \
1075#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1077 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1078 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1079 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1080 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1081 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1085#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1086 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1090#include <sys/syscall.h>
1100#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1102 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1103 kmp_int32 gtid_code = (gtid + 1) << 1; \
1105 KMP_FSYNC_PREPARE(ftx); \
1106 kmp_int32 poll_val; \
1107 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1108 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1109 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1110 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1112 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1114 KMP_LOCK_BUSY(1, futex))) { \
1117 poll_val |= KMP_LOCK_BUSY(1, futex); \
1120 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1121 NULL, NULL, 0)) != 0) { \
1126 KMP_FSYNC_ACQUIRED(ftx); \
1130#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1132 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1133 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1134 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1135 KMP_FSYNC_ACQUIRED(ftx); \
1143#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1145 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1147 KMP_FSYNC_RELEASING(ftx); \
1148 kmp_int32 poll_val = \
1149 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1150 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1151 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1152 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1155 KMP_YIELD_OVERSUB(); \
1162static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1165 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1168 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1175 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1176 __kmp_init_user_lock_with_checks(lck);
1177 __kmp_set_user_lock_location(lck, loc);
1179 __kmp_itt_critical_creating(lck);
1190 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1195 __kmp_itt_critical_destroyed(lck);
1199 __kmp_destroy_user_lock_with_checks(lck);
1200 __kmp_user_lock_free(&idx, gtid, lck);
1201 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1202 KMP_DEBUG_ASSERT(lck != NULL);
1221 kmp_critical_name *crit) {
1222#if KMP_USE_DYNAMIC_LOCK
1223#if OMPT_SUPPORT && OMPT_OPTIONAL
1224 OMPT_STORE_RETURN_ADDRESS(global_tid);
1229#if OMPT_SUPPORT && OMPT_OPTIONAL
1230 ompt_state_t prev_state = ompt_state_undefined;
1231 ompt_thread_info_t ti;
1233 kmp_user_lock_p lck;
1235 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1236 __kmp_assert_valid_gtid(global_tid);
1240 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1241 KMP_CHECK_USER_LOCK_INIT();
1243 if ((__kmp_user_lock_kind == lk_tas) &&
1244 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1245 lck = (kmp_user_lock_p)crit;
1248 else if ((__kmp_user_lock_kind == lk_futex) &&
1249 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1250 lck = (kmp_user_lock_p)crit;
1254 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1257 if (__kmp_env_consistency_check)
1258 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1266 __kmp_itt_critical_acquiring(lck);
1268#if OMPT_SUPPORT && OMPT_OPTIONAL
1269 OMPT_STORE_RETURN_ADDRESS(gtid);
1270 void *codeptr_ra = NULL;
1271 if (ompt_enabled.enabled) {
1272 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1274 prev_state = ti.state;
1275 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1276 ti.state = ompt_state_wait_critical;
1279 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1280 if (ompt_enabled.ompt_callback_mutex_acquire) {
1281 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1282 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1283 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1289 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1292 __kmp_itt_critical_acquired(lck);
1294#if OMPT_SUPPORT && OMPT_OPTIONAL
1295 if (ompt_enabled.enabled) {
1297 ti.state = prev_state;
1301 if (ompt_enabled.ompt_callback_mutex_acquired) {
1302 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1303 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1307 KMP_POP_PARTITIONED_TIMER();
1309 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1310 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1314#if KMP_USE_DYNAMIC_LOCK
1317static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1319#define KMP_TSX_LOCK(seq) lockseq_##seq
1321#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1324#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1325#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1327#define KMP_CPUINFO_RTM 0
1331 if (hint & kmp_lock_hint_hle)
1332 return KMP_TSX_LOCK(hle);
1333 if (hint & kmp_lock_hint_rtm)
1334 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1335 if (hint & kmp_lock_hint_adaptive)
1336 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1339 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1340 return __kmp_user_lock_seq;
1341 if ((hint & omp_lock_hint_speculative) &&
1342 (hint & omp_lock_hint_nonspeculative))
1343 return __kmp_user_lock_seq;
1346 if (hint & omp_lock_hint_contended)
1347 return lockseq_queuing;
1350 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1354 if (hint & omp_lock_hint_speculative)
1355 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1357 return __kmp_user_lock_seq;
1360#if OMPT_SUPPORT && OMPT_OPTIONAL
1361#if KMP_USE_DYNAMIC_LOCK
1362static kmp_mutex_impl_t
1363__ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1365 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1370 return kmp_mutex_impl_queuing;
1373 return kmp_mutex_impl_spin;
1376 case locktag_rtm_spin:
1377 return kmp_mutex_impl_speculative;
1380 return kmp_mutex_impl_none;
1382 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1385 switch (ilock->type) {
1387 case locktag_adaptive:
1388 case locktag_rtm_queuing:
1389 return kmp_mutex_impl_speculative;
1391 case locktag_nested_tas:
1392 return kmp_mutex_impl_spin;
1394 case locktag_nested_futex:
1396 case locktag_ticket:
1397 case locktag_queuing:
1399 case locktag_nested_ticket:
1400 case locktag_nested_queuing:
1401 case locktag_nested_drdpa:
1402 return kmp_mutex_impl_queuing;
1404 return kmp_mutex_impl_none;
1409static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1410 switch (__kmp_user_lock_kind) {
1412 return kmp_mutex_impl_spin;
1419 return kmp_mutex_impl_queuing;
1422 case lk_rtm_queuing:
1425 return kmp_mutex_impl_speculative;
1428 return kmp_mutex_impl_none;
1448 kmp_critical_name *crit, uint32_t hint) {
1450 kmp_user_lock_p lck;
1451#if OMPT_SUPPORT && OMPT_OPTIONAL
1452 ompt_state_t prev_state = ompt_state_undefined;
1453 ompt_thread_info_t ti;
1455 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1457 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1460 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1461 __kmp_assert_valid_gtid(global_tid);
1463 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1465 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1466 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1468 if (KMP_IS_D_LOCK(lockseq)) {
1469 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1470 KMP_GET_D_TAG(lockseq));
1472 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1478 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1479 lck = (kmp_user_lock_p)lk;
1480 if (__kmp_env_consistency_check) {
1481 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1482 __kmp_map_hint_to_lock(hint));
1485 __kmp_itt_critical_acquiring(lck);
1487#if OMPT_SUPPORT && OMPT_OPTIONAL
1488 if (ompt_enabled.enabled) {
1489 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1491 prev_state = ti.state;
1492 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1493 ti.state = ompt_state_wait_critical;
1496 if (ompt_enabled.ompt_callback_mutex_acquire) {
1497 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1498 ompt_mutex_critical, (
unsigned int)hint,
1499 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1504#if KMP_USE_INLINED_TAS
1505 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1506 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1508#elif KMP_USE_INLINED_FUTEX
1509 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1510 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1514 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1517 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1519 if (__kmp_env_consistency_check) {
1520 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1521 __kmp_map_hint_to_lock(hint));
1524 __kmp_itt_critical_acquiring(lck);
1526#if OMPT_SUPPORT && OMPT_OPTIONAL
1527 if (ompt_enabled.enabled) {
1528 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1530 prev_state = ti.state;
1531 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1532 ti.state = ompt_state_wait_critical;
1535 if (ompt_enabled.ompt_callback_mutex_acquire) {
1536 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1537 ompt_mutex_critical, (
unsigned int)hint,
1538 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1543 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1545 KMP_POP_PARTITIONED_TIMER();
1548 __kmp_itt_critical_acquired(lck);
1550#if OMPT_SUPPORT && OMPT_OPTIONAL
1551 if (ompt_enabled.enabled) {
1553 ti.state = prev_state;
1557 if (ompt_enabled.ompt_callback_mutex_acquired) {
1558 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1559 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1564 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1565 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1580 kmp_critical_name *crit) {
1581 kmp_user_lock_p lck;
1583 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1585#if KMP_USE_DYNAMIC_LOCK
1586 int locktag = KMP_EXTRACT_D_TAG(crit);
1588 lck = (kmp_user_lock_p)crit;
1589 KMP_ASSERT(lck != NULL);
1590 if (__kmp_env_consistency_check) {
1591 __kmp_pop_sync(global_tid, ct_critical, loc);
1594 __kmp_itt_critical_releasing(lck);
1596#if KMP_USE_INLINED_TAS
1597 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1598 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1600#elif KMP_USE_INLINED_FUTEX
1601 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1602 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1606 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1609 kmp_indirect_lock_t *ilk =
1610 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1611 KMP_ASSERT(ilk != NULL);
1613 if (__kmp_env_consistency_check) {
1614 __kmp_pop_sync(global_tid, ct_critical, loc);
1617 __kmp_itt_critical_releasing(lck);
1619 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1624 if ((__kmp_user_lock_kind == lk_tas) &&
1625 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1626 lck = (kmp_user_lock_p)crit;
1629 else if ((__kmp_user_lock_kind == lk_futex) &&
1630 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1631 lck = (kmp_user_lock_p)crit;
1635 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1638 KMP_ASSERT(lck != NULL);
1640 if (__kmp_env_consistency_check)
1641 __kmp_pop_sync(global_tid, ct_critical, loc);
1644 __kmp_itt_critical_releasing(lck);
1648 __kmp_release_user_lock_with_checks(lck, global_tid);
1652#if OMPT_SUPPORT && OMPT_OPTIONAL
1655 OMPT_STORE_RETURN_ADDRESS(global_tid);
1656 if (ompt_enabled.ompt_callback_mutex_released) {
1657 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1658 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1659 OMPT_LOAD_RETURN_ADDRESS(0));
1663 KMP_POP_PARTITIONED_TIMER();
1664 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1678 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1679 __kmp_assert_valid_gtid(global_tid);
1681 if (!TCR_4(__kmp_init_parallel))
1682 __kmp_parallel_initialize();
1684 __kmp_resume_if_soft_paused();
1686 if (__kmp_env_consistency_check)
1687 __kmp_check_barrier(global_tid, ct_barrier, loc);
1690 ompt_frame_t *ompt_frame;
1691 if (ompt_enabled.enabled) {
1692 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1693 if (ompt_frame->enter_frame.ptr == NULL)
1694 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1696 OMPT_STORE_RETURN_ADDRESS(global_tid);
1699 __kmp_threads[global_tid]->th.th_ident = loc;
1701 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1702#if OMPT_SUPPORT && OMPT_OPTIONAL
1703 if (ompt_enabled.enabled) {
1704 ompt_frame->enter_frame = ompt_data_none;
1708 return (status != 0) ? 0 : 1;
1721 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1722 __kmp_assert_valid_gtid(global_tid);
1723 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1738 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1739 __kmp_assert_valid_gtid(global_tid);
1741 if (!TCR_4(__kmp_init_parallel))
1742 __kmp_parallel_initialize();
1744 __kmp_resume_if_soft_paused();
1746 if (__kmp_env_consistency_check) {
1748 KMP_WARNING(ConstructIdentInvalid);
1750 __kmp_check_barrier(global_tid, ct_barrier, loc);
1754 ompt_frame_t *ompt_frame;
1755 if (ompt_enabled.enabled) {
1756 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1757 if (ompt_frame->enter_frame.ptr == NULL)
1758 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1760 OMPT_STORE_RETURN_ADDRESS(global_tid);
1763 __kmp_threads[global_tid]->th.th_ident = loc;
1765 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1766#if OMPT_SUPPORT && OMPT_OPTIONAL
1767 if (ompt_enabled.enabled) {
1768 ompt_frame->enter_frame = ompt_data_none;
1774 if (__kmp_env_consistency_check) {
1780 __kmp_pop_sync(global_tid, ct_master, loc);
1800 __kmp_assert_valid_gtid(global_tid);
1801 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1806 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1809#if OMPT_SUPPORT && OMPT_OPTIONAL
1810 kmp_info_t *this_thr = __kmp_threads[global_tid];
1811 kmp_team_t *team = this_thr->th.th_team;
1812 int tid = __kmp_tid_from_gtid(global_tid);
1814 if (ompt_enabled.enabled) {
1816 if (ompt_enabled.ompt_callback_work) {
1817 ompt_callbacks.ompt_callback(ompt_callback_work)(
1818 ompt_work_single_executor, ompt_scope_begin,
1819 &(team->t.ompt_team_info.parallel_data),
1820 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1821 1, OMPT_GET_RETURN_ADDRESS(0));
1824 if (ompt_enabled.ompt_callback_work) {
1825 ompt_callbacks.ompt_callback(ompt_callback_work)(
1826 ompt_work_single_other, ompt_scope_begin,
1827 &(team->t.ompt_team_info.parallel_data),
1828 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1829 1, OMPT_GET_RETURN_ADDRESS(0));
1830 ompt_callbacks.ompt_callback(ompt_callback_work)(
1831 ompt_work_single_other, ompt_scope_end,
1832 &(team->t.ompt_team_info.parallel_data),
1833 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1834 1, OMPT_GET_RETURN_ADDRESS(0));
1853 __kmp_assert_valid_gtid(global_tid);
1854 __kmp_exit_single(global_tid);
1855 KMP_POP_PARTITIONED_TIMER();
1857#if OMPT_SUPPORT && OMPT_OPTIONAL
1858 kmp_info_t *this_thr = __kmp_threads[global_tid];
1859 kmp_team_t *team = this_thr->th.th_team;
1860 int tid = __kmp_tid_from_gtid(global_tid);
1862 if (ompt_enabled.ompt_callback_work) {
1863 ompt_callbacks.ompt_callback(ompt_callback_work)(
1864 ompt_work_single_executor, ompt_scope_end,
1865 &(team->t.ompt_team_info.parallel_data),
1866 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1867 OMPT_GET_RETURN_ADDRESS(0));
1880 KMP_POP_PARTITIONED_TIMER();
1881 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1883#if OMPT_SUPPORT && OMPT_OPTIONAL
1884 if (ompt_enabled.ompt_callback_work) {
1885 ompt_work_t ompt_work_type = ompt_work_loop;
1886 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1887 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1891 ompt_work_type = ompt_work_loop;
1893 ompt_work_type = ompt_work_sections;
1895 ompt_work_type = ompt_work_distribute;
1900 KMP_DEBUG_ASSERT(ompt_work_type);
1902 ompt_callbacks.ompt_callback(ompt_callback_work)(
1903 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1904 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1907 if (__kmp_env_consistency_check)
1908 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1914void ompc_set_num_threads(
int arg) {
1916 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1919void ompc_set_dynamic(
int flag) {
1923 thread = __kmp_entry_thread();
1925 __kmp_save_internal_controls(thread);
1927 set__dynamic(thread, flag ?
true :
false);
1930void ompc_set_nested(
int flag) {
1934 thread = __kmp_entry_thread();
1936 __kmp_save_internal_controls(thread);
1938 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1941void ompc_set_max_active_levels(
int max_active_levels) {
1946 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1949void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1951 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1954int ompc_get_ancestor_thread_num(
int level) {
1955 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1958int ompc_get_team_size(
int level) {
1959 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1963void KMP_EXPAND_NAME(ompc_set_affinity_format)(
char const *format) {
1964 if (!__kmp_init_serial) {
1965 __kmp_serial_initialize();
1967 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1968 format, KMP_STRLEN(format) + 1);
1971size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(
char *buffer,
size_t size) {
1973 if (!__kmp_init_serial) {
1974 __kmp_serial_initialize();
1976 format_size = KMP_STRLEN(__kmp_affinity_format);
1977 if (buffer && size) {
1978 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1984void KMP_EXPAND_NAME(ompc_display_affinity)(
char const *format) {
1986 if (!TCR_4(__kmp_init_middle)) {
1987 __kmp_middle_initialize();
1989 __kmp_assign_root_init_mask();
1990 gtid = __kmp_get_gtid();
1991#if KMP_AFFINITY_SUPPORTED
1992 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
1993 __kmp_reset_root_init_mask(gtid);
1996 __kmp_aux_display_affinity(gtid, format);
1999size_t KMP_EXPAND_NAME(ompc_capture_affinity)(
char *buffer,
size_t buf_size,
2000 char const *format) {
2002 size_t num_required;
2003 kmp_str_buf_t capture_buf;
2004 if (!TCR_4(__kmp_init_middle)) {
2005 __kmp_middle_initialize();
2007 __kmp_assign_root_init_mask();
2008 gtid = __kmp_get_gtid();
2009#if KMP_AFFINITY_SUPPORTED
2010 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
2011 __kmp_reset_root_init_mask(gtid);
2014 __kmp_str_buf_init(&capture_buf);
2015 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2016 if (buffer && buf_size) {
2017 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2018 capture_buf.used + 1);
2020 __kmp_str_buf_free(&capture_buf);
2021 return num_required;
2024void kmpc_set_stacksize(
int arg) {
2026 __kmp_aux_set_stacksize(arg);
2029void kmpc_set_stacksize_s(
size_t arg) {
2031 __kmp_aux_set_stacksize(arg);
2034void kmpc_set_blocktime(
int arg) {
2038 gtid = __kmp_entry_gtid();
2039 tid = __kmp_tid_from_gtid(gtid);
2040 thread = __kmp_thread_from_gtid(gtid);
2042 __kmp_aux_set_blocktime(arg, thread, tid);
2045void kmpc_set_library(
int arg) {
2047 __kmp_user_set_library((
enum library_type)arg);
2050void kmpc_set_defaults(
char const *str) {
2052 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2055void kmpc_set_disp_num_buffers(
int arg) {
2058 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2059 arg <= KMP_MAX_DISP_NUM_BUFF) {
2060 __kmp_dispatch_num_buffers = arg;
2064int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
2065#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2068 if (!TCR_4(__kmp_init_middle)) {
2069 __kmp_middle_initialize();
2071 __kmp_assign_root_init_mask();
2072 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2076int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
2077#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2080 if (!TCR_4(__kmp_init_middle)) {
2081 __kmp_middle_initialize();
2083 __kmp_assign_root_init_mask();
2084 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2088int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
2089#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2092 if (!TCR_4(__kmp_init_middle)) {
2093 __kmp_middle_initialize();
2095 __kmp_assign_root_init_mask();
2096 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2146 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2149 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2150 __kmp_assert_valid_gtid(gtid);
2154 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2156 if (__kmp_env_consistency_check) {
2158 KMP_WARNING(ConstructIdentInvalid);
2165 *data_ptr = cpy_data;
2168 ompt_frame_t *ompt_frame;
2169 if (ompt_enabled.enabled) {
2170 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2171 if (ompt_frame->enter_frame.ptr == NULL)
2172 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2174 OMPT_STORE_RETURN_ADDRESS(gtid);
2178 __kmp_threads[gtid]->th.th_ident = loc;
2180 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2183 (*cpy_func)(cpy_data, *data_ptr);
2189 OMPT_STORE_RETURN_ADDRESS(gtid);
2192 __kmp_threads[gtid]->th.th_ident = loc;
2195 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2196#if OMPT_SUPPORT && OMPT_OPTIONAL
2197 if (ompt_enabled.enabled) {
2198 ompt_frame->enter_frame = ompt_data_none;
2224 KC_TRACE(10, (
"__kmpc_copyprivate_light: called T#%d\n", gtid));
2228 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2230 if (__kmp_env_consistency_check) {
2232 KMP_WARNING(ConstructIdentInvalid);
2239 *data_ptr = cpy_data;
2242 ompt_frame_t *ompt_frame;
2243 if (ompt_enabled.enabled) {
2244 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2245 if (ompt_frame->enter_frame.ptr == NULL)
2246 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2247 OMPT_STORE_RETURN_ADDRESS(gtid);
2252 __kmp_threads[gtid]->th.th_ident = loc;
2254 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2261#define INIT_LOCK __kmp_init_user_lock_with_checks
2262#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2263#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2264#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2265#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2266#define ACQUIRE_NESTED_LOCK_TIMED \
2267 __kmp_acquire_nested_user_lock_with_checks_timed
2268#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2269#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2270#define TEST_LOCK __kmp_test_user_lock_with_checks
2271#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2272#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2273#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2278#if KMP_USE_DYNAMIC_LOCK
2281static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2282 kmp_dyna_lockseq_t seq) {
2283 if (KMP_IS_D_LOCK(seq)) {
2284 KMP_INIT_D_LOCK(lock, seq);
2286 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2289 KMP_INIT_I_LOCK(lock, seq);
2291 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2292 __kmp_itt_lock_creating(ilk->lock, loc);
2298static __forceinline
void
2299__kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2300 kmp_dyna_lockseq_t seq) {
2303 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2304 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2305 seq = __kmp_user_lock_seq;
2309 seq = lockseq_nested_tas;
2313 seq = lockseq_nested_futex;
2316 case lockseq_ticket:
2317 seq = lockseq_nested_ticket;
2319 case lockseq_queuing:
2320 seq = lockseq_nested_queuing;
2323 seq = lockseq_nested_drdpa;
2326 seq = lockseq_nested_queuing;
2328 KMP_INIT_I_LOCK(lock, seq);
2330 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2331 __kmp_itt_lock_creating(ilk->lock, loc);
2336void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2338 KMP_DEBUG_ASSERT(__kmp_init_serial);
2339 if (__kmp_env_consistency_check && user_lock == NULL) {
2340 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2343 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2345#if OMPT_SUPPORT && OMPT_OPTIONAL
2347 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2349 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2350 if (ompt_enabled.ompt_callback_lock_init) {
2351 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2352 ompt_mutex_lock, (omp_lock_hint_t)hint,
2353 __ompt_get_mutex_impl_type(user_lock),
2354 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2360void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2361 void **user_lock, uintptr_t hint) {
2362 KMP_DEBUG_ASSERT(__kmp_init_serial);
2363 if (__kmp_env_consistency_check && user_lock == NULL) {
2364 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2367 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2369#if OMPT_SUPPORT && OMPT_OPTIONAL
2371 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2373 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2374 if (ompt_enabled.ompt_callback_lock_init) {
2375 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2376 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2377 __ompt_get_mutex_impl_type(user_lock),
2378 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2386void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2387#if KMP_USE_DYNAMIC_LOCK
2389 KMP_DEBUG_ASSERT(__kmp_init_serial);
2390 if (__kmp_env_consistency_check && user_lock == NULL) {
2391 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2393 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2395#if OMPT_SUPPORT && OMPT_OPTIONAL
2397 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2399 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2400 if (ompt_enabled.ompt_callback_lock_init) {
2401 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2402 ompt_mutex_lock, omp_lock_hint_none,
2403 __ompt_get_mutex_impl_type(user_lock),
2404 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2410 static char const *
const func =
"omp_init_lock";
2411 kmp_user_lock_p lck;
2412 KMP_DEBUG_ASSERT(__kmp_init_serial);
2414 if (__kmp_env_consistency_check) {
2415 if (user_lock == NULL) {
2416 KMP_FATAL(LockIsUninitialized, func);
2420 KMP_CHECK_USER_LOCK_INIT();
2422 if ((__kmp_user_lock_kind == lk_tas) &&
2423 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2424 lck = (kmp_user_lock_p)user_lock;
2427 else if ((__kmp_user_lock_kind == lk_futex) &&
2428 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2429 lck = (kmp_user_lock_p)user_lock;
2433 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2436 __kmp_set_user_lock_location(lck, loc);
2438#if OMPT_SUPPORT && OMPT_OPTIONAL
2440 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2442 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2443 if (ompt_enabled.ompt_callback_lock_init) {
2444 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2445 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2446 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2451 __kmp_itt_lock_creating(lck);
2458void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2459#if KMP_USE_DYNAMIC_LOCK
2461 KMP_DEBUG_ASSERT(__kmp_init_serial);
2462 if (__kmp_env_consistency_check && user_lock == NULL) {
2463 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2465 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2467#if OMPT_SUPPORT && OMPT_OPTIONAL
2469 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2471 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2472 if (ompt_enabled.ompt_callback_lock_init) {
2473 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2474 ompt_mutex_nest_lock, omp_lock_hint_none,
2475 __ompt_get_mutex_impl_type(user_lock),
2476 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2482 static char const *
const func =
"omp_init_nest_lock";
2483 kmp_user_lock_p lck;
2484 KMP_DEBUG_ASSERT(__kmp_init_serial);
2486 if (__kmp_env_consistency_check) {
2487 if (user_lock == NULL) {
2488 KMP_FATAL(LockIsUninitialized, func);
2492 KMP_CHECK_USER_LOCK_INIT();
2494 if ((__kmp_user_lock_kind == lk_tas) &&
2495 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2496 OMP_NEST_LOCK_T_SIZE)) {
2497 lck = (kmp_user_lock_p)user_lock;
2500 else if ((__kmp_user_lock_kind == lk_futex) &&
2501 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2502 OMP_NEST_LOCK_T_SIZE)) {
2503 lck = (kmp_user_lock_p)user_lock;
2507 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2510 INIT_NESTED_LOCK(lck);
2511 __kmp_set_user_lock_location(lck, loc);
2513#if OMPT_SUPPORT && OMPT_OPTIONAL
2515 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2517 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2518 if (ompt_enabled.ompt_callback_lock_init) {
2519 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2520 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2521 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2526 __kmp_itt_lock_creating(lck);
2532void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2533#if KMP_USE_DYNAMIC_LOCK
2536 kmp_user_lock_p lck;
2537 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2538 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2540 lck = (kmp_user_lock_p)user_lock;
2542 __kmp_itt_lock_destroyed(lck);
2544#if OMPT_SUPPORT && OMPT_OPTIONAL
2546 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2548 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2549 if (ompt_enabled.ompt_callback_lock_destroy) {
2550 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2551 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2554 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2556 kmp_user_lock_p lck;
2558 if ((__kmp_user_lock_kind == lk_tas) &&
2559 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2560 lck = (kmp_user_lock_p)user_lock;
2563 else if ((__kmp_user_lock_kind == lk_futex) &&
2564 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2565 lck = (kmp_user_lock_p)user_lock;
2569 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2572#if OMPT_SUPPORT && OMPT_OPTIONAL
2574 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2576 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2577 if (ompt_enabled.ompt_callback_lock_destroy) {
2578 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2579 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2584 __kmp_itt_lock_destroyed(lck);
2588 if ((__kmp_user_lock_kind == lk_tas) &&
2589 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2593 else if ((__kmp_user_lock_kind == lk_futex) &&
2594 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2599 __kmp_user_lock_free(user_lock, gtid, lck);
2605void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2606#if KMP_USE_DYNAMIC_LOCK
2609 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2610 __kmp_itt_lock_destroyed(ilk->lock);
2612#if OMPT_SUPPORT && OMPT_OPTIONAL
2614 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2616 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2617 if (ompt_enabled.ompt_callback_lock_destroy) {
2618 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2619 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2622 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2626 kmp_user_lock_p lck;
2628 if ((__kmp_user_lock_kind == lk_tas) &&
2629 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2630 OMP_NEST_LOCK_T_SIZE)) {
2631 lck = (kmp_user_lock_p)user_lock;
2634 else if ((__kmp_user_lock_kind == lk_futex) &&
2635 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2636 OMP_NEST_LOCK_T_SIZE)) {
2637 lck = (kmp_user_lock_p)user_lock;
2641 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2644#if OMPT_SUPPORT && OMPT_OPTIONAL
2646 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2648 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2649 if (ompt_enabled.ompt_callback_lock_destroy) {
2650 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2651 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2656 __kmp_itt_lock_destroyed(lck);
2659 DESTROY_NESTED_LOCK(lck);
2661 if ((__kmp_user_lock_kind == lk_tas) &&
2662 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2663 OMP_NEST_LOCK_T_SIZE)) {
2667 else if ((__kmp_user_lock_kind == lk_futex) &&
2668 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2669 OMP_NEST_LOCK_T_SIZE)) {
2674 __kmp_user_lock_free(user_lock, gtid, lck);
2679void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2681#if KMP_USE_DYNAMIC_LOCK
2682 int tag = KMP_EXTRACT_D_TAG(user_lock);
2684 __kmp_itt_lock_acquiring(
2688#if OMPT_SUPPORT && OMPT_OPTIONAL
2690 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2692 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2693 if (ompt_enabled.ompt_callback_mutex_acquire) {
2694 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2695 ompt_mutex_lock, omp_lock_hint_none,
2696 __ompt_get_mutex_impl_type(user_lock),
2697 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2700#if KMP_USE_INLINED_TAS
2701 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2702 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2704#elif KMP_USE_INLINED_FUTEX
2705 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2706 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2710 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2713 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2715#if OMPT_SUPPORT && OMPT_OPTIONAL
2716 if (ompt_enabled.ompt_callback_mutex_acquired) {
2717 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2718 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2724 kmp_user_lock_p lck;
2726 if ((__kmp_user_lock_kind == lk_tas) &&
2727 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2728 lck = (kmp_user_lock_p)user_lock;
2731 else if ((__kmp_user_lock_kind == lk_futex) &&
2732 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2733 lck = (kmp_user_lock_p)user_lock;
2737 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2741 __kmp_itt_lock_acquiring(lck);
2743#if OMPT_SUPPORT && OMPT_OPTIONAL
2745 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2747 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2748 if (ompt_enabled.ompt_callback_mutex_acquire) {
2749 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2750 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2751 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2755 ACQUIRE_LOCK(lck, gtid);
2758 __kmp_itt_lock_acquired(lck);
2761#if OMPT_SUPPORT && OMPT_OPTIONAL
2762 if (ompt_enabled.ompt_callback_mutex_acquired) {
2763 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2764 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2771void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2772#if KMP_USE_DYNAMIC_LOCK
2775 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2777#if OMPT_SUPPORT && OMPT_OPTIONAL
2779 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2781 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2782 if (ompt_enabled.enabled) {
2783 if (ompt_enabled.ompt_callback_mutex_acquire) {
2784 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2785 ompt_mutex_nest_lock, omp_lock_hint_none,
2786 __ompt_get_mutex_impl_type(user_lock),
2787 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2791 int acquire_status =
2792 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2793 (void)acquire_status;
2795 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2798#if OMPT_SUPPORT && OMPT_OPTIONAL
2799 if (ompt_enabled.enabled) {
2800 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2801 if (ompt_enabled.ompt_callback_mutex_acquired) {
2803 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2804 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2808 if (ompt_enabled.ompt_callback_nest_lock) {
2810 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2811 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2819 kmp_user_lock_p lck;
2821 if ((__kmp_user_lock_kind == lk_tas) &&
2822 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2823 OMP_NEST_LOCK_T_SIZE)) {
2824 lck = (kmp_user_lock_p)user_lock;
2827 else if ((__kmp_user_lock_kind == lk_futex) &&
2828 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2829 OMP_NEST_LOCK_T_SIZE)) {
2830 lck = (kmp_user_lock_p)user_lock;
2834 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2838 __kmp_itt_lock_acquiring(lck);
2840#if OMPT_SUPPORT && OMPT_OPTIONAL
2842 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2844 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2845 if (ompt_enabled.enabled) {
2846 if (ompt_enabled.ompt_callback_mutex_acquire) {
2847 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2848 ompt_mutex_nest_lock, omp_lock_hint_none,
2849 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2855 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2858 __kmp_itt_lock_acquired(lck);
2861#if OMPT_SUPPORT && OMPT_OPTIONAL
2862 if (ompt_enabled.enabled) {
2863 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2864 if (ompt_enabled.ompt_callback_mutex_acquired) {
2866 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2867 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2870 if (ompt_enabled.ompt_callback_nest_lock) {
2872 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2873 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2882void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2883#if KMP_USE_DYNAMIC_LOCK
2885 int tag = KMP_EXTRACT_D_TAG(user_lock);
2887 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2889#if KMP_USE_INLINED_TAS
2890 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2891 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2893#elif KMP_USE_INLINED_FUTEX
2894 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2895 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2899 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2902#if OMPT_SUPPORT && OMPT_OPTIONAL
2904 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2906 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2907 if (ompt_enabled.ompt_callback_mutex_released) {
2908 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2909 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2915 kmp_user_lock_p lck;
2920 if ((__kmp_user_lock_kind == lk_tas) &&
2921 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2922#if KMP_OS_LINUX && \
2923 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2926 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2928 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2931#if OMPT_SUPPORT && OMPT_OPTIONAL
2933 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2935 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2936 if (ompt_enabled.ompt_callback_mutex_released) {
2937 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2938 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2944 lck = (kmp_user_lock_p)user_lock;
2948 else if ((__kmp_user_lock_kind == lk_futex) &&
2949 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2950 lck = (kmp_user_lock_p)user_lock;
2954 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2958 __kmp_itt_lock_releasing(lck);
2961 RELEASE_LOCK(lck, gtid);
2963#if OMPT_SUPPORT && OMPT_OPTIONAL
2965 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2967 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2968 if (ompt_enabled.ompt_callback_mutex_released) {
2969 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2970 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2978void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2979#if KMP_USE_DYNAMIC_LOCK
2982 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2984 int release_status =
2985 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2986 (void)release_status;
2988#if OMPT_SUPPORT && OMPT_OPTIONAL
2990 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2992 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2993 if (ompt_enabled.enabled) {
2994 if (release_status == KMP_LOCK_RELEASED) {
2995 if (ompt_enabled.ompt_callback_mutex_released) {
2997 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2998 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3001 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3003 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3004 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3011 kmp_user_lock_p lck;
3015 if ((__kmp_user_lock_kind == lk_tas) &&
3016 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3017 OMP_NEST_LOCK_T_SIZE)) {
3018#if KMP_OS_LINUX && \
3019 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3021 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3023 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3026#if OMPT_SUPPORT && OMPT_OPTIONAL
3027 int release_status = KMP_LOCK_STILL_HELD;
3030 if (--(tl->lk.depth_locked) == 0) {
3031 TCW_4(tl->lk.poll, 0);
3032#if OMPT_SUPPORT && OMPT_OPTIONAL
3033 release_status = KMP_LOCK_RELEASED;
3038#if OMPT_SUPPORT && OMPT_OPTIONAL
3040 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3042 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3043 if (ompt_enabled.enabled) {
3044 if (release_status == KMP_LOCK_RELEASED) {
3045 if (ompt_enabled.ompt_callback_mutex_released) {
3047 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3048 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3050 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3052 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3053 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3060 lck = (kmp_user_lock_p)user_lock;
3064 else if ((__kmp_user_lock_kind == lk_futex) &&
3065 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3066 OMP_NEST_LOCK_T_SIZE)) {
3067 lck = (kmp_user_lock_p)user_lock;
3071 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
3075 __kmp_itt_lock_releasing(lck);
3079 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3080#if OMPT_SUPPORT && OMPT_OPTIONAL
3082 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3084 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3085 if (ompt_enabled.enabled) {
3086 if (release_status == KMP_LOCK_RELEASED) {
3087 if (ompt_enabled.ompt_callback_mutex_released) {
3089 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3090 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3092 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3094 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3095 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3104int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3107#if KMP_USE_DYNAMIC_LOCK
3109 int tag = KMP_EXTRACT_D_TAG(user_lock);
3111 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3113#if OMPT_SUPPORT && OMPT_OPTIONAL
3115 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3117 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3118 if (ompt_enabled.ompt_callback_mutex_acquire) {
3119 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3120 ompt_mutex_lock, omp_lock_hint_none,
3121 __ompt_get_mutex_impl_type(user_lock),
3122 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3125#if KMP_USE_INLINED_TAS
3126 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3127 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3129#elif KMP_USE_INLINED_FUTEX
3130 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3131 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3135 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3139 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3141#if OMPT_SUPPORT && OMPT_OPTIONAL
3142 if (ompt_enabled.ompt_callback_mutex_acquired) {
3143 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3144 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3150 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3157 kmp_user_lock_p lck;
3160 if ((__kmp_user_lock_kind == lk_tas) &&
3161 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3162 lck = (kmp_user_lock_p)user_lock;
3165 else if ((__kmp_user_lock_kind == lk_futex) &&
3166 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3167 lck = (kmp_user_lock_p)user_lock;
3171 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3175 __kmp_itt_lock_acquiring(lck);
3177#if OMPT_SUPPORT && OMPT_OPTIONAL
3179 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3181 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3182 if (ompt_enabled.ompt_callback_mutex_acquire) {
3183 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3184 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3185 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3189 rc = TEST_LOCK(lck, gtid);
3192 __kmp_itt_lock_acquired(lck);
3194 __kmp_itt_lock_cancelled(lck);
3197#if OMPT_SUPPORT && OMPT_OPTIONAL
3198 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3199 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3200 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3204 return (rc ? FTN_TRUE : FTN_FALSE);
3212int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3213#if KMP_USE_DYNAMIC_LOCK
3216 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3218#if OMPT_SUPPORT && OMPT_OPTIONAL
3220 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3222 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3223 if (ompt_enabled.ompt_callback_mutex_acquire) {
3224 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3225 ompt_mutex_nest_lock, omp_lock_hint_none,
3226 __ompt_get_mutex_impl_type(user_lock),
3227 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3230 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3233 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3235 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3238#if OMPT_SUPPORT && OMPT_OPTIONAL
3239 if (ompt_enabled.enabled && rc) {
3241 if (ompt_enabled.ompt_callback_mutex_acquired) {
3243 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3244 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3248 if (ompt_enabled.ompt_callback_nest_lock) {
3250 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3251 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3260 kmp_user_lock_p lck;
3263 if ((__kmp_user_lock_kind == lk_tas) &&
3264 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3265 OMP_NEST_LOCK_T_SIZE)) {
3266 lck = (kmp_user_lock_p)user_lock;
3269 else if ((__kmp_user_lock_kind == lk_futex) &&
3270 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3271 OMP_NEST_LOCK_T_SIZE)) {
3272 lck = (kmp_user_lock_p)user_lock;
3276 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3280 __kmp_itt_lock_acquiring(lck);
3283#if OMPT_SUPPORT && OMPT_OPTIONAL
3285 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3287 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3288 if (ompt_enabled.enabled) &&
3289 ompt_enabled.ompt_callback_mutex_acquire) {
3290 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3291 ompt_mutex_nest_lock, omp_lock_hint_none,
3292 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3297 rc = TEST_NESTED_LOCK(lck, gtid);
3300 __kmp_itt_lock_acquired(lck);
3302 __kmp_itt_lock_cancelled(lck);
3305#if OMPT_SUPPORT && OMPT_OPTIONAL
3306 if (ompt_enabled.enabled && rc) {
3308 if (ompt_enabled.ompt_callback_mutex_acquired) {
3310 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3311 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3314 if (ompt_enabled.ompt_callback_nest_lock) {
3316 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3317 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3336#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3337 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3339#define __KMP_GET_REDUCTION_METHOD(gtid) \
3340 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3346static __forceinline
void
3347__kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3348 kmp_critical_name *crit) {
3354 kmp_user_lock_p lck;
3356#if KMP_USE_DYNAMIC_LOCK
3358 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3361 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3362 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3363 KMP_GET_D_TAG(__kmp_user_lock_seq));
3365 __kmp_init_indirect_csptr(crit, loc, global_tid,
3366 KMP_GET_I_TAG(__kmp_user_lock_seq));
3372 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3373 lck = (kmp_user_lock_p)lk;
3374 KMP_DEBUG_ASSERT(lck != NULL);
3375 if (__kmp_env_consistency_check) {
3376 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3378 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3380 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3382 KMP_DEBUG_ASSERT(lck != NULL);
3383 if (__kmp_env_consistency_check) {
3384 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3386 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3394 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3395 lck = (kmp_user_lock_p)crit;
3397 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3399 KMP_DEBUG_ASSERT(lck != NULL);
3401 if (__kmp_env_consistency_check)
3402 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3404 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3410static __forceinline
void
3411__kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3412 kmp_critical_name *crit) {
3414 kmp_user_lock_p lck;
3416#if KMP_USE_DYNAMIC_LOCK
3418 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3419 lck = (kmp_user_lock_p)crit;
3420 if (__kmp_env_consistency_check)
3421 __kmp_pop_sync(global_tid, ct_critical, loc);
3422 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3424 kmp_indirect_lock_t *ilk =
3425 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3426 if (__kmp_env_consistency_check)
3427 __kmp_pop_sync(global_tid, ct_critical, loc);
3428 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3436 if (__kmp_base_user_lock_size > 32) {
3437 lck = *((kmp_user_lock_p *)crit);
3438 KMP_ASSERT(lck != NULL);
3440 lck = (kmp_user_lock_p)crit;
3443 if (__kmp_env_consistency_check)
3444 __kmp_pop_sync(global_tid, ct_critical, loc);
3446 __kmp_release_user_lock_with_checks(lck, global_tid);
3451static __forceinline
int
3452__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3457 if (th->th.th_teams_microtask) {
3458 *team_p = team = th->th.th_team;
3459 if (team->t.t_level == th->th.th_teams_level) {
3461 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3463 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3464 th->th.th_team = team->t.t_parent;
3465 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3466 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3467 *task_state = th->th.th_task_state;
3468 th->th.th_task_state = 0;
3476static __forceinline
void
3477__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3479 th->th.th_info.ds.ds_tid = 0;
3480 th->th.th_team = team;
3481 th->th.th_team_nproc = team->t.t_nproc;
3482 th->th.th_task_team = team->t.t_task_team[task_state];
3483 __kmp_type_convert(task_state, &(th->th.th_task_state));
3504 size_t reduce_size,
void *reduce_data,
3505 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3506 kmp_critical_name *lck) {
3510 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3513 int teams_swapped = 0, task_state;
3514 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3515 __kmp_assert_valid_gtid(global_tid);
3523 if (!TCR_4(__kmp_init_parallel))
3524 __kmp_parallel_initialize();
3526 __kmp_resume_if_soft_paused();
3529#if KMP_USE_DYNAMIC_LOCK
3530 if (__kmp_env_consistency_check)
3531 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3533 if (__kmp_env_consistency_check)
3534 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3537 th = __kmp_thread_from_gtid(global_tid);
3538 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3556 packed_reduction_method = __kmp_determine_reduction_method(
3557 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3558 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3560 OMPT_REDUCTION_DECL(th, global_tid);
3561 if (packed_reduction_method == critical_reduce_block) {
3563 OMPT_REDUCTION_BEGIN;
3565 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3568 }
else if (packed_reduction_method == empty_reduce_block) {
3570 OMPT_REDUCTION_BEGIN;
3576 }
else if (packed_reduction_method == atomic_reduce_block) {
3586 if (__kmp_env_consistency_check)
3587 __kmp_pop_sync(global_tid, ct_reduce, loc);
3589 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3590 tree_reduce_block)) {
3610 ompt_frame_t *ompt_frame;
3611 if (ompt_enabled.enabled) {
3612 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3613 if (ompt_frame->enter_frame.ptr == NULL)
3614 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3616 OMPT_STORE_RETURN_ADDRESS(global_tid);
3619 __kmp_threads[global_tid]->th.th_ident = loc;
3622 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3623 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3624 retval = (retval != 0) ? (0) : (1);
3625#if OMPT_SUPPORT && OMPT_OPTIONAL
3626 if (ompt_enabled.enabled) {
3627 ompt_frame->enter_frame = ompt_data_none;
3633 if (__kmp_env_consistency_check) {
3635 __kmp_pop_sync(global_tid, ct_reduce, loc);
3644 if (teams_swapped) {
3645 __kmp_restore_swapped_teams(th, team, task_state);
3649 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3650 global_tid, packed_reduction_method, retval));
3664 kmp_critical_name *lck) {
3666 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3668 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3669 __kmp_assert_valid_gtid(global_tid);
3671 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3673 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3675 if (packed_reduction_method == critical_reduce_block) {
3677 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3680 }
else if (packed_reduction_method == empty_reduce_block) {
3687 }
else if (packed_reduction_method == atomic_reduce_block) {
3694 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3695 tree_reduce_block)) {
3706 if (__kmp_env_consistency_check)
3707 __kmp_pop_sync(global_tid, ct_reduce, loc);
3709 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3710 global_tid, packed_reduction_method));
3733 size_t reduce_size,
void *reduce_data,
3734 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3735 kmp_critical_name *lck) {
3738 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3741 int teams_swapped = 0, task_state;
3743 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3744 __kmp_assert_valid_gtid(global_tid);
3752 if (!TCR_4(__kmp_init_parallel))
3753 __kmp_parallel_initialize();
3755 __kmp_resume_if_soft_paused();
3758#if KMP_USE_DYNAMIC_LOCK
3759 if (__kmp_env_consistency_check)
3760 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3762 if (__kmp_env_consistency_check)
3763 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3766 th = __kmp_thread_from_gtid(global_tid);
3767 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3769 packed_reduction_method = __kmp_determine_reduction_method(
3770 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3771 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3773 OMPT_REDUCTION_DECL(th, global_tid);
3775 if (packed_reduction_method == critical_reduce_block) {
3777 OMPT_REDUCTION_BEGIN;
3778 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3781 }
else if (packed_reduction_method == empty_reduce_block) {
3783 OMPT_REDUCTION_BEGIN;
3788 }
else if (packed_reduction_method == atomic_reduce_block) {
3792 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3793 tree_reduce_block)) {
3799 ompt_frame_t *ompt_frame;
3800 if (ompt_enabled.enabled) {
3801 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3802 if (ompt_frame->enter_frame.ptr == NULL)
3803 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3805 OMPT_STORE_RETURN_ADDRESS(global_tid);
3808 __kmp_threads[global_tid]->th.th_ident =
3812 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3813 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3814 retval = (retval != 0) ? (0) : (1);
3815#if OMPT_SUPPORT && OMPT_OPTIONAL
3816 if (ompt_enabled.enabled) {
3817 ompt_frame->enter_frame = ompt_data_none;
3823 if (__kmp_env_consistency_check) {
3825 __kmp_pop_sync(global_tid, ct_reduce, loc);
3834 if (teams_swapped) {
3835 __kmp_restore_swapped_teams(th, team, task_state);
3839 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3840 global_tid, packed_reduction_method, retval));
3855 kmp_critical_name *lck) {
3857 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3860 int teams_swapped = 0, task_state;
3862 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3863 __kmp_assert_valid_gtid(global_tid);
3865 th = __kmp_thread_from_gtid(global_tid);
3866 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3868 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3872 OMPT_REDUCTION_DECL(th, global_tid);
3874 if (packed_reduction_method == critical_reduce_block) {
3875 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3881 ompt_frame_t *ompt_frame;
3882 if (ompt_enabled.enabled) {
3883 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3884 if (ompt_frame->enter_frame.ptr == NULL)
3885 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3887 OMPT_STORE_RETURN_ADDRESS(global_tid);
3890 __kmp_threads[global_tid]->th.th_ident = loc;
3892 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3893#if OMPT_SUPPORT && OMPT_OPTIONAL
3894 if (ompt_enabled.enabled) {
3895 ompt_frame->enter_frame = ompt_data_none;
3899 }
else if (packed_reduction_method == empty_reduce_block) {
3907 ompt_frame_t *ompt_frame;
3908 if (ompt_enabled.enabled) {
3909 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3910 if (ompt_frame->enter_frame.ptr == NULL)
3911 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3913 OMPT_STORE_RETURN_ADDRESS(global_tid);
3916 __kmp_threads[global_tid]->th.th_ident = loc;
3918 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3919#if OMPT_SUPPORT && OMPT_OPTIONAL
3920 if (ompt_enabled.enabled) {
3921 ompt_frame->enter_frame = ompt_data_none;
3925 }
else if (packed_reduction_method == atomic_reduce_block) {
3928 ompt_frame_t *ompt_frame;
3929 if (ompt_enabled.enabled) {
3930 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3931 if (ompt_frame->enter_frame.ptr == NULL)
3932 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3934 OMPT_STORE_RETURN_ADDRESS(global_tid);
3938 __kmp_threads[global_tid]->th.th_ident = loc;
3940 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3941#if OMPT_SUPPORT && OMPT_OPTIONAL
3942 if (ompt_enabled.enabled) {
3943 ompt_frame->enter_frame = ompt_data_none;
3947 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3948 tree_reduce_block)) {
3951 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3959 if (teams_swapped) {
3960 __kmp_restore_swapped_teams(th, team, task_state);
3963 if (__kmp_env_consistency_check)
3964 __kmp_pop_sync(global_tid, ct_reduce, loc);
3966 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3967 global_tid, packed_reduction_method));
3972#undef __KMP_GET_REDUCTION_METHOD
3973#undef __KMP_SET_REDUCTION_METHOD
3977kmp_uint64 __kmpc_get_taskid() {
3982 gtid = __kmp_get_gtid();
3986 thread = __kmp_thread_from_gtid(gtid);
3987 return thread->th.th_current_task->td_task_id;
3991kmp_uint64 __kmpc_get_parent_taskid() {
3995 kmp_taskdata_t *parent_task;
3997 gtid = __kmp_get_gtid();
4001 thread = __kmp_thread_from_gtid(gtid);
4002 parent_task = thread->th.th_current_task->td_parent;
4003 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4019 const struct kmp_dim *dims) {
4020 __kmp_assert_valid_gtid(gtid);
4022 kmp_int64 last, trace_count;
4023 kmp_info_t *th = __kmp_threads[gtid];
4024 kmp_team_t *team = th->th.th_team;
4026 kmp_disp_t *pr_buf = th->th.th_dispatch;
4027 dispatch_shared_info_t *sh_buf;
4031 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4032 gtid, num_dims, !team->t.t_serialized));
4033 KMP_DEBUG_ASSERT(dims != NULL);
4034 KMP_DEBUG_ASSERT(num_dims > 0);
4036 if (team->t.t_serialized) {
4037 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
4040 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4041 idx = pr_buf->th_doacross_buf_idx++;
4043 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4046 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4047 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4048 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
4049 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4050 pr_buf->th_doacross_info[0] =
4051 (kmp_int64)num_dims;
4054 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4055 pr_buf->th_doacross_info[2] = dims[0].lo;
4056 pr_buf->th_doacross_info[3] = dims[0].up;
4057 pr_buf->th_doacross_info[4] = dims[0].st;
4059 for (j = 1; j < num_dims; ++j) {
4062 if (dims[j].st == 1) {
4064 range_length = dims[j].up - dims[j].lo + 1;
4066 if (dims[j].st > 0) {
4067 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4068 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4070 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4072 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4075 pr_buf->th_doacross_info[last++] = range_length;
4076 pr_buf->th_doacross_info[last++] = dims[j].lo;
4077 pr_buf->th_doacross_info[last++] = dims[j].up;
4078 pr_buf->th_doacross_info[last++] = dims[j].st;
4083 if (dims[0].st == 1) {
4084 trace_count = dims[0].up - dims[0].lo + 1;
4085 }
else if (dims[0].st > 0) {
4086 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4087 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4089 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4090 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4092 for (j = 1; j < num_dims; ++j) {
4093 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
4095 KMP_DEBUG_ASSERT(trace_count > 0);
4099 if (idx != sh_buf->doacross_buf_idx) {
4101 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4108 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4109 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4111 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4112 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4114 if (flags == NULL) {
4117 (size_t)trace_count / 8 + 8;
4118 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4120 sh_buf->doacross_flags = flags;
4121 }
else if (flags == (kmp_uint32 *)1) {
4124 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4126 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4133 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
4134 pr_buf->th_doacross_flags =
4135 sh_buf->doacross_flags;
4137 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
4140void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4141 __kmp_assert_valid_gtid(gtid);
4145 kmp_int64 iter_number;
4146 kmp_info_t *th = __kmp_threads[gtid];
4147 kmp_team_t *team = th->th.th_team;
4149 kmp_int64 lo, up, st;
4151 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4152 if (team->t.t_serialized) {
4153 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4158 pr_buf = th->th.th_dispatch;
4159 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4160 num_dims = (size_t)pr_buf->th_doacross_info[0];
4161 lo = pr_buf->th_doacross_info[2];
4162 up = pr_buf->th_doacross_info[3];
4163 st = pr_buf->th_doacross_info[4];
4164#
if OMPT_SUPPORT && OMPT_OPTIONAL
4165 ompt_dependence_t deps[num_dims];
4168 if (vec[0] < lo || vec[0] > up) {
4169 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4170 "bounds [%lld,%lld]\n",
4171 gtid, vec[0], lo, up));
4174 iter_number = vec[0] - lo;
4175 }
else if (st > 0) {
4176 if (vec[0] < lo || vec[0] > up) {
4177 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4178 "bounds [%lld,%lld]\n",
4179 gtid, vec[0], lo, up));
4182 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4184 if (vec[0] > lo || vec[0] < up) {
4185 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4186 "bounds [%lld,%lld]\n",
4187 gtid, vec[0], lo, up));
4190 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4192#if OMPT_SUPPORT && OMPT_OPTIONAL
4193 deps[0].variable.value = iter_number;
4194 deps[0].dependence_type = ompt_dependence_type_sink;
4196 for (i = 1; i < num_dims; ++i) {
4199 ln = pr_buf->th_doacross_info[j + 1];
4200 lo = pr_buf->th_doacross_info[j + 2];
4201 up = pr_buf->th_doacross_info[j + 3];
4202 st = pr_buf->th_doacross_info[j + 4];
4204 if (vec[i] < lo || vec[i] > up) {
4205 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4206 "bounds [%lld,%lld]\n",
4207 gtid, vec[i], lo, up));
4211 }
else if (st > 0) {
4212 if (vec[i] < lo || vec[i] > up) {
4213 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4214 "bounds [%lld,%lld]\n",
4215 gtid, vec[i], lo, up));
4218 iter = (kmp_uint64)(vec[i] - lo) / st;
4220 if (vec[i] > lo || vec[i] < up) {
4221 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4222 "bounds [%lld,%lld]\n",
4223 gtid, vec[i], lo, up));
4226 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4228 iter_number = iter + ln * iter_number;
4229#if OMPT_SUPPORT && OMPT_OPTIONAL
4230 deps[i].variable.value = iter;
4231 deps[i].dependence_type = ompt_dependence_type_sink;
4234 shft = iter_number % 32;
4237 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4241#if OMPT_SUPPORT && OMPT_OPTIONAL
4242 if (ompt_enabled.ompt_callback_dependences) {
4243 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4244 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4248 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4249 gtid, (iter_number << 5) + shft));
4252void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4253 __kmp_assert_valid_gtid(gtid);
4257 kmp_int64 iter_number;
4258 kmp_info_t *th = __kmp_threads[gtid];
4259 kmp_team_t *team = th->th.th_team;
4263 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4264 if (team->t.t_serialized) {
4265 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4271 pr_buf = th->th.th_dispatch;
4272 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4273 num_dims = (size_t)pr_buf->th_doacross_info[0];
4274 lo = pr_buf->th_doacross_info[2];
4275 st = pr_buf->th_doacross_info[4];
4276#
if OMPT_SUPPORT && OMPT_OPTIONAL
4277 ompt_dependence_t deps[num_dims];
4280 iter_number = vec[0] - lo;
4281 }
else if (st > 0) {
4282 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4284 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4286#if OMPT_SUPPORT && OMPT_OPTIONAL
4287 deps[0].variable.value = iter_number;
4288 deps[0].dependence_type = ompt_dependence_type_source;
4290 for (i = 1; i < num_dims; ++i) {
4293 ln = pr_buf->th_doacross_info[j + 1];
4294 lo = pr_buf->th_doacross_info[j + 2];
4295 st = pr_buf->th_doacross_info[j + 4];
4298 }
else if (st > 0) {
4299 iter = (kmp_uint64)(vec[i] - lo) / st;
4301 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4303 iter_number = iter + ln * iter_number;
4304#if OMPT_SUPPORT && OMPT_OPTIONAL
4305 deps[i].variable.value = iter;
4306 deps[i].dependence_type = ompt_dependence_type_source;
4309#if OMPT_SUPPORT && OMPT_OPTIONAL
4310 if (ompt_enabled.ompt_callback_dependences) {
4311 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4312 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4315 shft = iter_number % 32;
4319 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4320 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4321 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4322 (iter_number << 5) + shft));
4325void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4326 __kmp_assert_valid_gtid(gtid);
4328 kmp_info_t *th = __kmp_threads[gtid];
4329 kmp_team_t *team = th->th.th_team;
4330 kmp_disp_t *pr_buf = th->th.th_dispatch;
4332 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4333 if (team->t.t_serialized) {
4334 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4338 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4339 if (num_done == th->th.th_team_nproc) {
4341 int idx = pr_buf->th_doacross_buf_idx - 1;
4342 dispatch_shared_info_t *sh_buf =
4343 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4344 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4345 (kmp_int64)&sh_buf->doacross_num_done);
4346 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4347 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4348 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4349 sh_buf->doacross_flags = NULL;
4350 sh_buf->doacross_num_done = 0;
4351 sh_buf->doacross_buf_idx +=
4352 __kmp_dispatch_num_buffers;
4355 pr_buf->th_doacross_flags = NULL;
4356 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4357 pr_buf->th_doacross_info = NULL;
4358 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4362void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4363 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4366void *omp_aligned_alloc(
size_t align,
size_t size,
4367 omp_allocator_handle_t allocator) {
4368 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4371void *omp_calloc(
size_t nmemb,
size_t size, omp_allocator_handle_t allocator) {
4372 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4375void *omp_aligned_calloc(
size_t align,
size_t nmemb,
size_t size,
4376 omp_allocator_handle_t allocator) {
4377 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4380void *omp_realloc(
void *ptr,
size_t size, omp_allocator_handle_t allocator,
4381 omp_allocator_handle_t free_allocator) {
4382 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4386void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4387 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4391int __kmpc_get_target_offload(
void) {
4392 if (!__kmp_init_serial) {
4393 __kmp_serial_initialize();
4395 return __kmp_target_offload;
4398int __kmpc_pause_resource(kmp_pause_status_t level) {
4399 if (!__kmp_init_serial) {
4402 return __kmp_pause_resource(level);
4405void __kmpc_error(
ident_t *loc,
int severity,
const char *message) {
4406 if (!__kmp_init_serial)
4407 __kmp_serial_initialize();
4409 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4412 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4413 ompt_callbacks.ompt_callback(ompt_callback_error)(
4414 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4415 OMPT_GET_RETURN_ADDRESS(0));
4421 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->
psource,
false);
4423 __kmp_str_format(
"%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
4424 __kmp_str_loc_free(&str_loc);
4426 src_loc = __kmp_str_format(
"unknown");
4429 if (severity == severity_warning)
4430 KMP_WARNING(UserDirectedWarning, src_loc, message);
4432 KMP_FATAL(UserDirectedError, src_loc, message);
4434 __kmp_str_free(&src_loc);
4438void __kmpc_scope(
ident_t *loc, kmp_int32 gtid,
void *reserved) {
4440#if OMPT_SUPPORT && OMPT_OPTIONAL
4441 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4442 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4443 int tid = __kmp_tid_from_gtid(gtid);
4444 ompt_callbacks.ompt_callback(ompt_callback_work)(
4445 ompt_work_scope, ompt_scope_begin,
4446 &(team->t.ompt_team_info.parallel_data),
4447 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4448 OMPT_GET_RETURN_ADDRESS(0));
4454void __kmpc_end_scope(
ident_t *loc, kmp_int32 gtid,
void *reserved) {
4456#if OMPT_SUPPORT && OMPT_OPTIONAL
4457 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4458 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4459 int tid = __kmp_tid_from_gtid(gtid);
4460 ompt_callbacks.ompt_callback(ompt_callback_work)(
4461 ompt_work_scope, ompt_scope_end,
4462 &(team->t.ompt_team_info.parallel_data),
4463 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4464 OMPT_GET_RETURN_ADDRESS(0));
4469#ifdef KMP_USE_VERSION_SYMBOLS
4478#ifdef omp_set_affinity_format
4479#undef omp_set_affinity_format
4481#ifdef omp_get_affinity_format
4482#undef omp_get_affinity_format
4484#ifdef omp_display_affinity
4485#undef omp_display_affinity
4487#ifdef omp_capture_affinity
4488#undef omp_capture_affinity
4490KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4492KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4494KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4496KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
@ KMP_IDENT_WORK_SECTIONS
@ KMP_IDENT_WORK_DISTRIBUTE
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
stats_state_e
the states which a thread can be in
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)