LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1/*
2 * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#define __KMP_IMP
14#include "omp.h" /* extern "C" declarations of user-visible routines */
15#include "kmp.h"
16#include "kmp_error.h"
17#include "kmp_i18n.h"
18#include "kmp_itt.h"
19#include "kmp_lock.h"
20#include "kmp_stats.h"
21#include "ompt-specific.h"
22
23#define MAX_MESSAGE 512
24
25// flags will be used in future, e.g. to implement openmp_strict library
26// restrictions
27
36void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37 // By default __kmpc_begin() is no-op.
38 char *env;
39 if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
44 } else if (__kmp_ignore_mppbeg() == FALSE) {
45 // By default __kmp_ignore_mppbeg() returns TRUE.
46 __kmp_internal_begin();
47 KC_TRACE(10, ("__kmpc_begin: called\n"));
48 }
49}
50
59void __kmpc_end(ident_t *loc) {
60 // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
61 // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
62 // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
63 // returns FALSE and __kmpc_end() will unregister this root (it can cause
64 // library shut down).
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE(10, ("__kmpc_end: called\n"));
67 KA_TRACE(30, ("__kmpc_end\n"));
68
69 __kmp_internal_end_thread(-1);
70 }
71#if KMP_OS_WINDOWS && OMPT_SUPPORT
72 // Normal exit process on Windows does not allow worker threads of the final
73 // parallel region to finish reporting their events, so shutting down the
74 // library here fixes the issue at least for the cases where __kmpc_end() is
75 // placed properly.
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
78#endif
79}
80
100 kmp_int32 gtid = __kmp_entry_gtid();
101
102 KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
103
104 return gtid;
105}
106
122 KC_TRACE(10,
123 ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124
125 return TCR_4(__kmp_all_nth);
126}
127
135 KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
137}
138
145 KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
146
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
148}
149
156kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
157#ifndef KMP_DEBUG
158
159 return TRUE;
160
161#else
162
163 const char *semi2;
164 const char *semi3;
165 int line_no;
166
167 if (__kmp_par_range == 0) {
168 return TRUE;
169 }
170 semi2 = loc->psource;
171 if (semi2 == NULL) {
172 return TRUE;
173 }
174 semi2 = strchr(semi2, ';');
175 if (semi2 == NULL) {
176 return TRUE;
177 }
178 semi2 = strchr(semi2 + 1, ';');
179 if (semi2 == NULL) {
180 return TRUE;
181 }
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
185 name--;
186 }
187 if ((*name == '/') || (*name == ';')) {
188 name++;
189 }
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
192 }
193 }
194 semi3 = strchr(semi2 + 1, ';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
199 }
200 }
201 if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
204 }
205 return __kmp_par_range < 0;
206 }
207 return TRUE;
208
209#endif /* KMP_DEBUG */
210}
211
219 return __kmp_entry_thread()->th.th_root->r.r_active;
220}
221
231void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
232 kmp_int32 num_threads) {
233 KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid, num_threads));
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
237}
238
239void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
241 /* the num_threads are automatically popped */
242}
243
244void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247 proc_bind));
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250}
251
262void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263 int gtid = __kmp_entry_gtid();
264
265#if (KMP_STATS_ENABLED)
266 // If we were in a serial region, then stop the serial timer, record
267 // the event, and start parallel region timer
268 stats_state_e previous_state = KMP_GET_THREAD_STATE();
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271 } else {
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273 }
274 int inParallel = __kmpc_in_parallel(loc);
275 if (inParallel) {
276 KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277 } else {
278 KMP_COUNT_BLOCK(OMP_PARALLEL);
279 }
280#endif
281
282 // maybe to save thr_state is enough here
283 {
284 va_list ap;
285 va_start(ap, microtask);
286
287#if OMPT_SUPPORT
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
293 }
294 OMPT_STORE_RETURN_ADDRESS(gtid);
295#endif
296
297#if INCLUDE_SSC_MARKS
298 SSC_MARK_FORKING();
299#endif
300 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301 VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
303 kmp_va_addr_of(ap));
304#if INCLUDE_SSC_MARKS
305 SSC_MARK_JOINING();
306#endif
307 __kmp_join_call(loc, gtid
308#if OMPT_SUPPORT
309 ,
310 fork_context_intel
311#endif
312 );
313
314 va_end(ap);
315
316#if OMPT_SUPPORT
317 if (ompt_enabled.enabled) {
318 ompt_frame->enter_frame = ompt_data_none;
319 }
320#endif
321 }
322
323#if KMP_STATS_ENABLED
324 if (previous_state == stats_state_e::SERIAL_REGION) {
325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326 KMP_SET_THREAD_STATE(previous_state);
327 } else {
328 KMP_POP_PARTITIONED_TIMER();
329 }
330#endif // KMP_STATS_ENABLED
331}
332
344void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
345 kmp_int32 num_teams, kmp_int32 num_threads) {
346 KA_TRACE(20,
347 ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
348 global_tid, num_teams, num_threads));
349 __kmp_assert_valid_gtid(global_tid);
350 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
351}
352
369void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
370 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
371 kmp_int32 num_threads) {
372 KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
373 " num_teams_ub=%d num_threads=%d\n",
374 global_tid, num_teams_lb, num_teams_ub, num_threads));
375 __kmp_assert_valid_gtid(global_tid);
376 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
377 num_threads);
378}
379
390void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
391 ...) {
392 int gtid = __kmp_entry_gtid();
393 kmp_info_t *this_thr = __kmp_threads[gtid];
394 va_list ap;
395 va_start(ap, microtask);
396
397#if KMP_STATS_ENABLED
398 KMP_COUNT_BLOCK(OMP_TEAMS);
399 stats_state_e previous_state = KMP_GET_THREAD_STATE();
400 if (previous_state == stats_state_e::SERIAL_REGION) {
401 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
402 } else {
403 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
404 }
405#endif
406
407 // remember teams entry point and nesting level
408 this_thr->th.th_teams_microtask = microtask;
409 this_thr->th.th_teams_level =
410 this_thr->th.th_team->t.t_level; // AC: can be >0 on host
411
412#if OMPT_SUPPORT
413 kmp_team_t *parent_team = this_thr->th.th_team;
414 int tid = __kmp_tid_from_gtid(gtid);
415 if (ompt_enabled.enabled) {
416 parent_team->t.t_implicit_task_taskdata[tid]
417 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
418 }
419 OMPT_STORE_RETURN_ADDRESS(gtid);
420#endif
421
422 // check if __kmpc_push_num_teams called, set default number of teams
423 // otherwise
424 if (this_thr->th.th_teams_size.nteams == 0) {
425 __kmp_push_num_teams(loc, gtid, 0, 0);
426 }
427 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
428 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
429 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
430
431 __kmp_fork_call(
432 loc, gtid, fork_context_intel, argc,
433 VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
434 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
435 __kmp_join_call(loc, gtid
436#if OMPT_SUPPORT
437 ,
438 fork_context_intel
439#endif
440 );
441
442 // Pop current CG root off list
443 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
444 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
445 this_thr->th.th_cg_roots = tmp->up;
446 KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
447 " to node %p. cg_nthreads was %d\n",
448 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
449 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
450 int i = tmp->cg_nthreads--;
451 if (i == 1) { // check is we are the last thread in CG (not always the case)
452 __kmp_free(tmp);
453 }
454 // Restore current task's thread_limit from CG root
455 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
456 this_thr->th.th_current_task->td_icvs.thread_limit =
457 this_thr->th.th_cg_roots->cg_thread_limit;
458
459 this_thr->th.th_teams_microtask = NULL;
460 this_thr->th.th_teams_level = 0;
461 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
462 va_end(ap);
463#if KMP_STATS_ENABLED
464 if (previous_state == stats_state_e::SERIAL_REGION) {
465 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
466 KMP_SET_THREAD_STATE(previous_state);
467 } else {
468 KMP_POP_PARTITIONED_TIMER();
469 }
470#endif // KMP_STATS_ENABLED
471}
472
473// I don't think this function should ever have been exported.
474// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
475// openmp code ever called it, but it's been exported from the RTL for so
476// long that I'm afraid to remove the definition.
477int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
478
491void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
492 // The implementation is now in kmp_runtime.cpp so that it can share static
493 // functions with kmp_fork_call since the tasks to be done are similar in
494 // each case.
495 __kmp_assert_valid_gtid(global_tid);
496#if OMPT_SUPPORT
497 OMPT_STORE_RETURN_ADDRESS(global_tid);
498#endif
499 __kmp_serialized_parallel(loc, global_tid);
500}
501
509void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
510 kmp_internal_control_t *top;
511 kmp_info_t *this_thr;
512 kmp_team_t *serial_team;
513
514 KC_TRACE(10,
515 ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
516
517 /* skip all this code for autopar serialized loops since it results in
518 unacceptable overhead */
519 if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
520 return;
521
522 // Not autopar code
523 __kmp_assert_valid_gtid(global_tid);
524 if (!TCR_4(__kmp_init_parallel))
525 __kmp_parallel_initialize();
526
527 __kmp_resume_if_soft_paused();
528
529 this_thr = __kmp_threads[global_tid];
530 serial_team = this_thr->th.th_serial_team;
531
532 kmp_task_team_t *task_team = this_thr->th.th_task_team;
533 // we need to wait for the proxy tasks before finishing the thread
534 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
535 task_team->tt.tt_hidden_helper_task_encountered))
536 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
537
538 KMP_MB();
539 KMP_DEBUG_ASSERT(serial_team);
540 KMP_ASSERT(serial_team->t.t_serialized);
541 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
542 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
543 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
544 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
545
546#if OMPT_SUPPORT
547 if (ompt_enabled.enabled &&
548 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
549 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
550 if (ompt_enabled.ompt_callback_implicit_task) {
551 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
552 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
553 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
554 }
555
556 // reset clear the task id only after unlinking the task
557 ompt_data_t *parent_task_data;
558 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
559
560 if (ompt_enabled.ompt_callback_parallel_end) {
561 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
562 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
563 ompt_parallel_invoker_program | ompt_parallel_team,
564 OMPT_LOAD_RETURN_ADDRESS(global_tid));
565 }
566 __ompt_lw_taskteam_unlink(this_thr);
567 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
568 }
569#endif
570
571 /* If necessary, pop the internal control stack values and replace the team
572 * values */
573 top = serial_team->t.t_control_stack_top;
574 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
575 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
576 serial_team->t.t_control_stack_top = top->next;
577 __kmp_free(top);
578 }
579
580 /* pop dispatch buffers stack */
581 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
582 {
583 dispatch_private_info_t *disp_buffer =
584 serial_team->t.t_dispatch->th_disp_buffer;
585 serial_team->t.t_dispatch->th_disp_buffer =
586 serial_team->t.t_dispatch->th_disp_buffer->next;
587 __kmp_free(disp_buffer);
588 }
589 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
590
591 --serial_team->t.t_serialized;
592 if (serial_team->t.t_serialized == 0) {
593
594 /* return to the parallel section */
595
596#if KMP_ARCH_X86 || KMP_ARCH_X86_64
597 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
598 __kmp_clear_x87_fpu_status_word();
599 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
600 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
601 }
602#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
603
604 __kmp_pop_current_task_from_thread(this_thr);
605#if OMPD_SUPPORT
606 if (ompd_state & OMPD_ENABLE_BP)
607 ompd_bp_parallel_end();
608#endif
609
610 this_thr->th.th_team = serial_team->t.t_parent;
611 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
612
613 /* restore values cached in the thread */
614 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
615 this_thr->th.th_team_master =
616 serial_team->t.t_parent->t.t_threads[0]; /* JPH */
617 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
618
619 /* TODO the below shouldn't need to be adjusted for serialized teams */
620 this_thr->th.th_dispatch =
621 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
622
623 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
624 this_thr->th.th_current_task->td_flags.executing = 1;
625
626 if (__kmp_tasking_mode != tskm_immediate_exec) {
627 // Copy the task team from the new child / old parent team to the thread.
628 this_thr->th.th_task_team =
629 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
630 KA_TRACE(20,
631 ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
632 "team %p\n",
633 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
634 }
635 } else {
636 if (__kmp_tasking_mode != tskm_immediate_exec) {
637 KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
638 "depth of serial team %p to %d\n",
639 global_tid, serial_team, serial_team->t.t_serialized));
640 }
641 }
642
643 serial_team->t.t_level--;
644 if (__kmp_env_consistency_check)
645 __kmp_pop_parallel(global_tid, NULL);
646#if OMPT_SUPPORT
647 if (ompt_enabled.enabled)
648 this_thr->th.ompt_thread_info.state =
649 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
650 : ompt_state_work_parallel);
651#endif
652}
653
663 KC_TRACE(10, ("__kmpc_flush: called\n"));
664
665 /* need explicit __mf() here since use volatile instead in library */
666 KMP_MB(); /* Flush all pending memory write invalidates. */
667
668#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
669#if KMP_MIC
670// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
671// We shouldn't need it, though, since the ABI rules require that
672// * If the compiler generates NGO stores it also generates the fence
673// * If users hand-code NGO stores they should insert the fence
674// therefore no incomplete unordered stores should be visible.
675#else
676 // C74404
677 // This is to address non-temporal store instructions (sfence needed).
678 // The clflush instruction is addressed either (mfence needed).
679 // Probably the non-temporal load monvtdqa instruction should also be
680 // addressed.
681 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
682 if (!__kmp_cpuinfo.initialized) {
683 __kmp_query_cpuid(&__kmp_cpuinfo);
684 }
685 if (!__kmp_cpuinfo.flags.sse2) {
686 // CPU cannot execute SSE2 instructions.
687 } else {
688#if KMP_COMPILER_ICC
689 _mm_mfence();
690#elif KMP_COMPILER_MSVC
691 MemoryBarrier();
692#else
693 __sync_synchronize();
694#endif // KMP_COMPILER_ICC
695 }
696#endif // KMP_MIC
697#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
698 KMP_ARCH_RISCV64)
699// Nothing to see here move along
700#elif KMP_ARCH_PPC64
701// Nothing needed here (we have a real MB above).
702#else
703#error Unknown or unsupported architecture
704#endif
705
706#if OMPT_SUPPORT && OMPT_OPTIONAL
707 if (ompt_enabled.ompt_callback_flush) {
708 ompt_callbacks.ompt_callback(ompt_callback_flush)(
709 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
710 }
711#endif
712}
713
714/* -------------------------------------------------------------------------- */
722void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
723 KMP_COUNT_BLOCK(OMP_BARRIER);
724 KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
725 __kmp_assert_valid_gtid(global_tid);
726
727 if (!TCR_4(__kmp_init_parallel))
728 __kmp_parallel_initialize();
729
730 __kmp_resume_if_soft_paused();
731
732 if (__kmp_env_consistency_check) {
733 if (loc == 0) {
734 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
735 }
736 __kmp_check_barrier(global_tid, ct_barrier, loc);
737 }
738
739#if OMPT_SUPPORT
740 ompt_frame_t *ompt_frame;
741 if (ompt_enabled.enabled) {
742 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
743 if (ompt_frame->enter_frame.ptr == NULL)
744 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
745 }
746 OMPT_STORE_RETURN_ADDRESS(global_tid);
747#endif
748 __kmp_threads[global_tid]->th.th_ident = loc;
749 // TODO: explicit barrier_wait_id:
750 // this function is called when 'barrier' directive is present or
751 // implicit barrier at the end of a worksharing construct.
752 // 1) better to add a per-thread barrier counter to a thread data structure
753 // 2) set to 0 when a new team is created
754 // 4) no sync is required
755
756 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
757#if OMPT_SUPPORT && OMPT_OPTIONAL
758 if (ompt_enabled.enabled) {
759 ompt_frame->enter_frame = ompt_data_none;
760 }
761#endif
762}
763
764/* The BARRIER for a MASTER section is always explicit */
771kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
772 int status = 0;
773
774 KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
775 __kmp_assert_valid_gtid(global_tid);
776
777 if (!TCR_4(__kmp_init_parallel))
778 __kmp_parallel_initialize();
779
780 __kmp_resume_if_soft_paused();
781
782 if (KMP_MASTER_GTID(global_tid)) {
783 KMP_COUNT_BLOCK(OMP_MASTER);
784 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
785 status = 1;
786 }
787
788#if OMPT_SUPPORT && OMPT_OPTIONAL
789 if (status) {
790 if (ompt_enabled.ompt_callback_masked) {
791 kmp_info_t *this_thr = __kmp_threads[global_tid];
792 kmp_team_t *team = this_thr->th.th_team;
793
794 int tid = __kmp_tid_from_gtid(global_tid);
795 ompt_callbacks.ompt_callback(ompt_callback_masked)(
796 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
797 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
798 OMPT_GET_RETURN_ADDRESS(0));
799 }
800 }
801#endif
802
803 if (__kmp_env_consistency_check) {
804#if KMP_USE_DYNAMIC_LOCK
805 if (status)
806 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
807 else
808 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
809#else
810 if (status)
811 __kmp_push_sync(global_tid, ct_master, loc, NULL);
812 else
813 __kmp_check_sync(global_tid, ct_master, loc, NULL);
814#endif
815 }
816
817 return status;
818}
819
828void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
829 KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
830 __kmp_assert_valid_gtid(global_tid);
831 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
832 KMP_POP_PARTITIONED_TIMER();
833
834#if OMPT_SUPPORT && OMPT_OPTIONAL
835 kmp_info_t *this_thr = __kmp_threads[global_tid];
836 kmp_team_t *team = this_thr->th.th_team;
837 if (ompt_enabled.ompt_callback_masked) {
838 int tid = __kmp_tid_from_gtid(global_tid);
839 ompt_callbacks.ompt_callback(ompt_callback_masked)(
840 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
841 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
842 OMPT_GET_RETURN_ADDRESS(0));
843 }
844#endif
845
846 if (__kmp_env_consistency_check) {
847 if (KMP_MASTER_GTID(global_tid))
848 __kmp_pop_sync(global_tid, ct_master, loc);
849 }
850}
851
860kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
861 int status = 0;
862 int tid;
863 KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
864 __kmp_assert_valid_gtid(global_tid);
865
866 if (!TCR_4(__kmp_init_parallel))
867 __kmp_parallel_initialize();
868
869 __kmp_resume_if_soft_paused();
870
871 tid = __kmp_tid_from_gtid(global_tid);
872 if (tid == filter) {
873 KMP_COUNT_BLOCK(OMP_MASKED);
874 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
875 status = 1;
876 }
877
878#if OMPT_SUPPORT && OMPT_OPTIONAL
879 if (status) {
880 if (ompt_enabled.ompt_callback_masked) {
881 kmp_info_t *this_thr = __kmp_threads[global_tid];
882 kmp_team_t *team = this_thr->th.th_team;
883 ompt_callbacks.ompt_callback(ompt_callback_masked)(
884 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
885 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
886 OMPT_GET_RETURN_ADDRESS(0));
887 }
888 }
889#endif
890
891 if (__kmp_env_consistency_check) {
892#if KMP_USE_DYNAMIC_LOCK
893 if (status)
894 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
895 else
896 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
897#else
898 if (status)
899 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
900 else
901 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
902#endif
903 }
904
905 return status;
906}
907
916void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
917 KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
918 __kmp_assert_valid_gtid(global_tid);
919 KMP_POP_PARTITIONED_TIMER();
920
921#if OMPT_SUPPORT && OMPT_OPTIONAL
922 kmp_info_t *this_thr = __kmp_threads[global_tid];
923 kmp_team_t *team = this_thr->th.th_team;
924 if (ompt_enabled.ompt_callback_masked) {
925 int tid = __kmp_tid_from_gtid(global_tid);
926 ompt_callbacks.ompt_callback(ompt_callback_masked)(
927 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
928 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
929 OMPT_GET_RETURN_ADDRESS(0));
930 }
931#endif
932
933 if (__kmp_env_consistency_check) {
934 __kmp_pop_sync(global_tid, ct_masked, loc);
935 }
936}
937
945void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
946 int cid = 0;
947 kmp_info_t *th;
948 KMP_DEBUG_ASSERT(__kmp_init_serial);
949
950 KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
951 __kmp_assert_valid_gtid(gtid);
952
953 if (!TCR_4(__kmp_init_parallel))
954 __kmp_parallel_initialize();
955
956 __kmp_resume_if_soft_paused();
957
958#if USE_ITT_BUILD
959 __kmp_itt_ordered_prep(gtid);
960// TODO: ordered_wait_id
961#endif /* USE_ITT_BUILD */
962
963 th = __kmp_threads[gtid];
964
965#if OMPT_SUPPORT && OMPT_OPTIONAL
966 kmp_team_t *team;
967 ompt_wait_id_t lck;
968 void *codeptr_ra;
969 OMPT_STORE_RETURN_ADDRESS(gtid);
970 if (ompt_enabled.enabled) {
971 team = __kmp_team_from_gtid(gtid);
972 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
973 /* OMPT state update */
974 th->th.ompt_thread_info.wait_id = lck;
975 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
976
977 /* OMPT event callback */
978 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
979 if (ompt_enabled.ompt_callback_mutex_acquire) {
980 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
981 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
982 codeptr_ra);
983 }
984 }
985#endif
986
987 if (th->th.th_dispatch->th_deo_fcn != 0)
988 (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
989 else
990 __kmp_parallel_deo(&gtid, &cid, loc);
991
992#if OMPT_SUPPORT && OMPT_OPTIONAL
993 if (ompt_enabled.enabled) {
994 /* OMPT state update */
995 th->th.ompt_thread_info.state = ompt_state_work_parallel;
996 th->th.ompt_thread_info.wait_id = 0;
997
998 /* OMPT event callback */
999 if (ompt_enabled.ompt_callback_mutex_acquired) {
1000 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1001 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1002 }
1003 }
1004#endif
1005
1006#if USE_ITT_BUILD
1007 __kmp_itt_ordered_start(gtid);
1008#endif /* USE_ITT_BUILD */
1009}
1010
1018void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1019 int cid = 0;
1020 kmp_info_t *th;
1021
1022 KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1023 __kmp_assert_valid_gtid(gtid);
1024
1025#if USE_ITT_BUILD
1026 __kmp_itt_ordered_end(gtid);
1027// TODO: ordered_wait_id
1028#endif /* USE_ITT_BUILD */
1029
1030 th = __kmp_threads[gtid];
1031
1032 if (th->th.th_dispatch->th_dxo_fcn != 0)
1033 (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1034 else
1035 __kmp_parallel_dxo(&gtid, &cid, loc);
1036
1037#if OMPT_SUPPORT && OMPT_OPTIONAL
1038 OMPT_STORE_RETURN_ADDRESS(gtid);
1039 if (ompt_enabled.ompt_callback_mutex_released) {
1040 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1041 ompt_mutex_ordered,
1042 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1043 ->t.t_ordered.dt.t_value,
1044 OMPT_LOAD_RETURN_ADDRESS(gtid));
1045 }
1046#endif
1047}
1048
1049#if KMP_USE_DYNAMIC_LOCK
1050
1051static __forceinline void
1052__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1053 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1054 // Pointer to the allocated indirect lock is written to crit, while indexing
1055 // is ignored.
1056 void *idx;
1057 kmp_indirect_lock_t **lck;
1058 lck = (kmp_indirect_lock_t **)crit;
1059 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1060 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1061 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1062 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1063 KA_TRACE(20,
1064 ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1065#if USE_ITT_BUILD
1066 __kmp_itt_critical_creating(ilk->lock, loc);
1067#endif
1068 int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1069 if (status == 0) {
1070#if USE_ITT_BUILD
1071 __kmp_itt_critical_destroyed(ilk->lock);
1072#endif
1073 // We don't really need to destroy the unclaimed lock here since it will be
1074 // cleaned up at program exit.
1075 // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1076 }
1077 KMP_DEBUG_ASSERT(*lck != NULL);
1078}
1079
1080// Fast-path acquire tas lock
1081#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1082 { \
1083 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1084 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1085 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1086 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1087 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1088 kmp_uint32 spins; \
1089 KMP_FSYNC_PREPARE(l); \
1090 KMP_INIT_YIELD(spins); \
1091 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1092 do { \
1093 if (TCR_4(__kmp_nth) > \
1094 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1095 KMP_YIELD(TRUE); \
1096 } else { \
1097 KMP_YIELD_SPIN(spins); \
1098 } \
1099 __kmp_spin_backoff(&backoff); \
1100 } while ( \
1101 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1102 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1103 } \
1104 KMP_FSYNC_ACQUIRED(l); \
1105 }
1106
1107// Fast-path test tas lock
1108#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1109 { \
1110 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1111 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1112 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1113 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1114 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1115 }
1116
1117// Fast-path release tas lock
1118#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1119 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1120
1121#if KMP_USE_FUTEX
1122
1123#include <sys/syscall.h>
1124#include <unistd.h>
1125#ifndef FUTEX_WAIT
1126#define FUTEX_WAIT 0
1127#endif
1128#ifndef FUTEX_WAKE
1129#define FUTEX_WAKE 1
1130#endif
1131
1132// Fast-path acquire futex lock
1133#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1134 { \
1135 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1136 kmp_int32 gtid_code = (gtid + 1) << 1; \
1137 KMP_MB(); \
1138 KMP_FSYNC_PREPARE(ftx); \
1139 kmp_int32 poll_val; \
1140 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1141 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1142 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1143 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1144 if (!cond) { \
1145 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1146 poll_val | \
1147 KMP_LOCK_BUSY(1, futex))) { \
1148 continue; \
1149 } \
1150 poll_val |= KMP_LOCK_BUSY(1, futex); \
1151 } \
1152 kmp_int32 rc; \
1153 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1154 NULL, NULL, 0)) != 0) { \
1155 continue; \
1156 } \
1157 gtid_code |= 1; \
1158 } \
1159 KMP_FSYNC_ACQUIRED(ftx); \
1160 }
1161
1162// Fast-path test futex lock
1163#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1164 { \
1165 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1166 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1167 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1168 KMP_FSYNC_ACQUIRED(ftx); \
1169 rc = TRUE; \
1170 } else { \
1171 rc = FALSE; \
1172 } \
1173 }
1174
1175// Fast-path release futex lock
1176#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1177 { \
1178 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1179 KMP_MB(); \
1180 KMP_FSYNC_RELEASING(ftx); \
1181 kmp_int32 poll_val = \
1182 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1183 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1184 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1185 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1186 } \
1187 KMP_MB(); \
1188 KMP_YIELD_OVERSUB(); \
1189 }
1190
1191#endif // KMP_USE_FUTEX
1192
1193#else // KMP_USE_DYNAMIC_LOCK
1194
1195static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1196 ident_t const *loc,
1197 kmp_int32 gtid) {
1198 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1199
1200 // Because of the double-check, the following load doesn't need to be volatile
1201 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1202
1203 if (lck == NULL) {
1204 void *idx;
1205
1206 // Allocate & initialize the lock.
1207 // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1208 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1209 __kmp_init_user_lock_with_checks(lck);
1210 __kmp_set_user_lock_location(lck, loc);
1211#if USE_ITT_BUILD
1212 __kmp_itt_critical_creating(lck);
1213// __kmp_itt_critical_creating() should be called *before* the first usage
1214// of underlying lock. It is the only place where we can guarantee it. There
1215// are chances the lock will destroyed with no usage, but it is not a
1216// problem, because this is not real event seen by user but rather setting
1217// name for object (lock). See more details in kmp_itt.h.
1218#endif /* USE_ITT_BUILD */
1219
1220 // Use a cmpxchg instruction to slam the start of the critical section with
1221 // the lock pointer. If another thread beat us to it, deallocate the lock,
1222 // and use the lock that the other thread allocated.
1223 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1224
1225 if (status == 0) {
1226// Deallocate the lock and reload the value.
1227#if USE_ITT_BUILD
1228 __kmp_itt_critical_destroyed(lck);
1229// Let ITT know the lock is destroyed and the same memory location may be reused
1230// for another purpose.
1231#endif /* USE_ITT_BUILD */
1232 __kmp_destroy_user_lock_with_checks(lck);
1233 __kmp_user_lock_free(&idx, gtid, lck);
1234 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1235 KMP_DEBUG_ASSERT(lck != NULL);
1236 }
1237 }
1238 return lck;
1239}
1240
1241#endif // KMP_USE_DYNAMIC_LOCK
1242
1253void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1254 kmp_critical_name *crit) {
1255#if KMP_USE_DYNAMIC_LOCK
1256#if OMPT_SUPPORT && OMPT_OPTIONAL
1257 OMPT_STORE_RETURN_ADDRESS(global_tid);
1258#endif // OMPT_SUPPORT
1259 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1260#else
1261 KMP_COUNT_BLOCK(OMP_CRITICAL);
1262#if OMPT_SUPPORT && OMPT_OPTIONAL
1263 ompt_state_t prev_state = ompt_state_undefined;
1264 ompt_thread_info_t ti;
1265#endif
1266 kmp_user_lock_p lck;
1267
1268 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1269 __kmp_assert_valid_gtid(global_tid);
1270
1271 // TODO: add THR_OVHD_STATE
1272
1273 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1274 KMP_CHECK_USER_LOCK_INIT();
1275
1276 if ((__kmp_user_lock_kind == lk_tas) &&
1277 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1278 lck = (kmp_user_lock_p)crit;
1279 }
1280#if KMP_USE_FUTEX
1281 else if ((__kmp_user_lock_kind == lk_futex) &&
1282 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1283 lck = (kmp_user_lock_p)crit;
1284 }
1285#endif
1286 else { // ticket, queuing or drdpa
1287 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1288 }
1289
1290 if (__kmp_env_consistency_check)
1291 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1292
1293 // since the critical directive binds to all threads, not just the current
1294 // team we have to check this even if we are in a serialized team.
1295 // also, even if we are the uber thread, we still have to conduct the lock,
1296 // as we have to contend with sibling threads.
1297
1298#if USE_ITT_BUILD
1299 __kmp_itt_critical_acquiring(lck);
1300#endif /* USE_ITT_BUILD */
1301#if OMPT_SUPPORT && OMPT_OPTIONAL
1302 OMPT_STORE_RETURN_ADDRESS(gtid);
1303 void *codeptr_ra = NULL;
1304 if (ompt_enabled.enabled) {
1305 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1306 /* OMPT state update */
1307 prev_state = ti.state;
1308 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1309 ti.state = ompt_state_wait_critical;
1310
1311 /* OMPT event callback */
1312 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1313 if (ompt_enabled.ompt_callback_mutex_acquire) {
1314 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1315 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1316 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1317 }
1318 }
1319#endif
1320 // Value of 'crit' should be good for using as a critical_id of the critical
1321 // section directive.
1322 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1323
1324#if USE_ITT_BUILD
1325 __kmp_itt_critical_acquired(lck);
1326#endif /* USE_ITT_BUILD */
1327#if OMPT_SUPPORT && OMPT_OPTIONAL
1328 if (ompt_enabled.enabled) {
1329 /* OMPT state update */
1330 ti.state = prev_state;
1331 ti.wait_id = 0;
1332
1333 /* OMPT event callback */
1334 if (ompt_enabled.ompt_callback_mutex_acquired) {
1335 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1336 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1337 }
1338 }
1339#endif
1340 KMP_POP_PARTITIONED_TIMER();
1341
1342 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1343 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1344#endif // KMP_USE_DYNAMIC_LOCK
1345}
1346
1347#if KMP_USE_DYNAMIC_LOCK
1348
1349// Converts the given hint to an internal lock implementation
1350static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1351#if KMP_USE_TSX
1352#define KMP_TSX_LOCK(seq) lockseq_##seq
1353#else
1354#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1355#endif
1356
1357#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1358#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1359#else
1360#define KMP_CPUINFO_RTM 0
1361#endif
1362
1363 // Hints that do not require further logic
1364 if (hint & kmp_lock_hint_hle)
1365 return KMP_TSX_LOCK(hle);
1366 if (hint & kmp_lock_hint_rtm)
1367 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1368 if (hint & kmp_lock_hint_adaptive)
1369 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1370
1371 // Rule out conflicting hints first by returning the default lock
1372 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1373 return __kmp_user_lock_seq;
1374 if ((hint & omp_lock_hint_speculative) &&
1375 (hint & omp_lock_hint_nonspeculative))
1376 return __kmp_user_lock_seq;
1377
1378 // Do not even consider speculation when it appears to be contended
1379 if (hint & omp_lock_hint_contended)
1380 return lockseq_queuing;
1381
1382 // Uncontended lock without speculation
1383 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1384 return lockseq_tas;
1385
1386 // Use RTM lock for speculation
1387 if (hint & omp_lock_hint_speculative)
1388 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1389
1390 return __kmp_user_lock_seq;
1391}
1392
1393#if OMPT_SUPPORT && OMPT_OPTIONAL
1394#if KMP_USE_DYNAMIC_LOCK
1395static kmp_mutex_impl_t
1396__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1397 if (user_lock) {
1398 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1399 case 0:
1400 break;
1401#if KMP_USE_FUTEX
1402 case locktag_futex:
1403 return kmp_mutex_impl_queuing;
1404#endif
1405 case locktag_tas:
1406 return kmp_mutex_impl_spin;
1407#if KMP_USE_TSX
1408 case locktag_hle:
1409 case locktag_rtm_spin:
1410 return kmp_mutex_impl_speculative;
1411#endif
1412 default:
1413 return kmp_mutex_impl_none;
1414 }
1415 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1416 }
1417 KMP_ASSERT(ilock);
1418 switch (ilock->type) {
1419#if KMP_USE_TSX
1420 case locktag_adaptive:
1421 case locktag_rtm_queuing:
1422 return kmp_mutex_impl_speculative;
1423#endif
1424 case locktag_nested_tas:
1425 return kmp_mutex_impl_spin;
1426#if KMP_USE_FUTEX
1427 case locktag_nested_futex:
1428#endif
1429 case locktag_ticket:
1430 case locktag_queuing:
1431 case locktag_drdpa:
1432 case locktag_nested_ticket:
1433 case locktag_nested_queuing:
1434 case locktag_nested_drdpa:
1435 return kmp_mutex_impl_queuing;
1436 default:
1437 return kmp_mutex_impl_none;
1438 }
1439}
1440#else
1441// For locks without dynamic binding
1442static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1443 switch (__kmp_user_lock_kind) {
1444 case lk_tas:
1445 return kmp_mutex_impl_spin;
1446#if KMP_USE_FUTEX
1447 case lk_futex:
1448#endif
1449 case lk_ticket:
1450 case lk_queuing:
1451 case lk_drdpa:
1452 return kmp_mutex_impl_queuing;
1453#if KMP_USE_TSX
1454 case lk_hle:
1455 case lk_rtm_queuing:
1456 case lk_rtm_spin:
1457 case lk_adaptive:
1458 return kmp_mutex_impl_speculative;
1459#endif
1460 default:
1461 return kmp_mutex_impl_none;
1462 }
1463}
1464#endif // KMP_USE_DYNAMIC_LOCK
1465#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1466
1480void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1481 kmp_critical_name *crit, uint32_t hint) {
1482 KMP_COUNT_BLOCK(OMP_CRITICAL);
1483 kmp_user_lock_p lck;
1484#if OMPT_SUPPORT && OMPT_OPTIONAL
1485 ompt_state_t prev_state = ompt_state_undefined;
1486 ompt_thread_info_t ti;
1487 // This is the case, if called from __kmpc_critical:
1488 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1489 if (!codeptr)
1490 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1491#endif
1492
1493 KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1494 __kmp_assert_valid_gtid(global_tid);
1495
1496 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1497 // Check if it is initialized.
1498 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1499 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1500 if (*lk == 0) {
1501 if (KMP_IS_D_LOCK(lockseq)) {
1502 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1503 KMP_GET_D_TAG(lockseq));
1504 } else {
1505 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1506 }
1507 }
1508 // Branch for accessing the actual lock object and set operation. This
1509 // branching is inevitable since this lock initialization does not follow the
1510 // normal dispatch path (lock table is not used).
1511 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1512 lck = (kmp_user_lock_p)lk;
1513 if (__kmp_env_consistency_check) {
1514 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1515 __kmp_map_hint_to_lock(hint));
1516 }
1517#if USE_ITT_BUILD
1518 __kmp_itt_critical_acquiring(lck);
1519#endif
1520#if OMPT_SUPPORT && OMPT_OPTIONAL
1521 if (ompt_enabled.enabled) {
1522 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1523 /* OMPT state update */
1524 prev_state = ti.state;
1525 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1526 ti.state = ompt_state_wait_critical;
1527
1528 /* OMPT event callback */
1529 if (ompt_enabled.ompt_callback_mutex_acquire) {
1530 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1531 ompt_mutex_critical, (unsigned int)hint,
1532 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1533 codeptr);
1534 }
1535 }
1536#endif
1537#if KMP_USE_INLINED_TAS
1538 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1539 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1540 } else
1541#elif KMP_USE_INLINED_FUTEX
1542 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1543 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1544 } else
1545#endif
1546 {
1547 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1548 }
1549 } else {
1550 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1551 lck = ilk->lock;
1552 if (__kmp_env_consistency_check) {
1553 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1554 __kmp_map_hint_to_lock(hint));
1555 }
1556#if USE_ITT_BUILD
1557 __kmp_itt_critical_acquiring(lck);
1558#endif
1559#if OMPT_SUPPORT && OMPT_OPTIONAL
1560 if (ompt_enabled.enabled) {
1561 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1562 /* OMPT state update */
1563 prev_state = ti.state;
1564 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1565 ti.state = ompt_state_wait_critical;
1566
1567 /* OMPT event callback */
1568 if (ompt_enabled.ompt_callback_mutex_acquire) {
1569 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1570 ompt_mutex_critical, (unsigned int)hint,
1571 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1572 codeptr);
1573 }
1574 }
1575#endif
1576 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1577 }
1578 KMP_POP_PARTITIONED_TIMER();
1579
1580#if USE_ITT_BUILD
1581 __kmp_itt_critical_acquired(lck);
1582#endif /* USE_ITT_BUILD */
1583#if OMPT_SUPPORT && OMPT_OPTIONAL
1584 if (ompt_enabled.enabled) {
1585 /* OMPT state update */
1586 ti.state = prev_state;
1587 ti.wait_id = 0;
1588
1589 /* OMPT event callback */
1590 if (ompt_enabled.ompt_callback_mutex_acquired) {
1591 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1592 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1593 }
1594 }
1595#endif
1596
1597 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1598 KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1599} // __kmpc_critical_with_hint
1600
1601#endif // KMP_USE_DYNAMIC_LOCK
1602
1612void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1613 kmp_critical_name *crit) {
1614 kmp_user_lock_p lck;
1615
1616 KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1617
1618#if KMP_USE_DYNAMIC_LOCK
1619 int locktag = KMP_EXTRACT_D_TAG(crit);
1620 if (locktag) {
1621 lck = (kmp_user_lock_p)crit;
1622 KMP_ASSERT(lck != NULL);
1623 if (__kmp_env_consistency_check) {
1624 __kmp_pop_sync(global_tid, ct_critical, loc);
1625 }
1626#if USE_ITT_BUILD
1627 __kmp_itt_critical_releasing(lck);
1628#endif
1629#if KMP_USE_INLINED_TAS
1630 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1631 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1632 } else
1633#elif KMP_USE_INLINED_FUTEX
1634 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1635 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1636 } else
1637#endif
1638 {
1639 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1640 }
1641 } else {
1642 kmp_indirect_lock_t *ilk =
1643 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1644 KMP_ASSERT(ilk != NULL);
1645 lck = ilk->lock;
1646 if (__kmp_env_consistency_check) {
1647 __kmp_pop_sync(global_tid, ct_critical, loc);
1648 }
1649#if USE_ITT_BUILD
1650 __kmp_itt_critical_releasing(lck);
1651#endif
1652 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1653 }
1654
1655#else // KMP_USE_DYNAMIC_LOCK
1656
1657 if ((__kmp_user_lock_kind == lk_tas) &&
1658 (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1659 lck = (kmp_user_lock_p)crit;
1660 }
1661#if KMP_USE_FUTEX
1662 else if ((__kmp_user_lock_kind == lk_futex) &&
1663 (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1664 lck = (kmp_user_lock_p)crit;
1665 }
1666#endif
1667 else { // ticket, queuing or drdpa
1668 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1669 }
1670
1671 KMP_ASSERT(lck != NULL);
1672
1673 if (__kmp_env_consistency_check)
1674 __kmp_pop_sync(global_tid, ct_critical, loc);
1675
1676#if USE_ITT_BUILD
1677 __kmp_itt_critical_releasing(lck);
1678#endif /* USE_ITT_BUILD */
1679 // Value of 'crit' should be good for using as a critical_id of the critical
1680 // section directive.
1681 __kmp_release_user_lock_with_checks(lck, global_tid);
1682
1683#endif // KMP_USE_DYNAMIC_LOCK
1684
1685#if OMPT_SUPPORT && OMPT_OPTIONAL
1686 /* OMPT release event triggers after lock is released; place here to trigger
1687 * for all #if branches */
1688 OMPT_STORE_RETURN_ADDRESS(global_tid);
1689 if (ompt_enabled.ompt_callback_mutex_released) {
1690 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1691 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1692 OMPT_LOAD_RETURN_ADDRESS(0));
1693 }
1694#endif
1695
1696 KMP_POP_PARTITIONED_TIMER();
1697 KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1698}
1699
1709kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1710 int status;
1711 KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1712 __kmp_assert_valid_gtid(global_tid);
1713
1714 if (!TCR_4(__kmp_init_parallel))
1715 __kmp_parallel_initialize();
1716
1717 __kmp_resume_if_soft_paused();
1718
1719 if (__kmp_env_consistency_check)
1720 __kmp_check_barrier(global_tid, ct_barrier, loc);
1721
1722#if OMPT_SUPPORT
1723 ompt_frame_t *ompt_frame;
1724 if (ompt_enabled.enabled) {
1725 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1726 if (ompt_frame->enter_frame.ptr == NULL)
1727 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1728 }
1729 OMPT_STORE_RETURN_ADDRESS(global_tid);
1730#endif
1731#if USE_ITT_NOTIFY
1732 __kmp_threads[global_tid]->th.th_ident = loc;
1733#endif
1734 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1735#if OMPT_SUPPORT && OMPT_OPTIONAL
1736 if (ompt_enabled.enabled) {
1737 ompt_frame->enter_frame = ompt_data_none;
1738 }
1739#endif
1740
1741 return (status != 0) ? 0 : 1;
1742}
1743
1753void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1754 KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1755 __kmp_assert_valid_gtid(global_tid);
1756 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1757}
1758
1769kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1770 kmp_int32 ret;
1771 KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1772 __kmp_assert_valid_gtid(global_tid);
1773
1774 if (!TCR_4(__kmp_init_parallel))
1775 __kmp_parallel_initialize();
1776
1777 __kmp_resume_if_soft_paused();
1778
1779 if (__kmp_env_consistency_check) {
1780 if (loc == 0) {
1781 KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1782 }
1783 __kmp_check_barrier(global_tid, ct_barrier, loc);
1784 }
1785
1786#if OMPT_SUPPORT
1787 ompt_frame_t *ompt_frame;
1788 if (ompt_enabled.enabled) {
1789 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1790 if (ompt_frame->enter_frame.ptr == NULL)
1791 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1792 }
1793 OMPT_STORE_RETURN_ADDRESS(global_tid);
1794#endif
1795#if USE_ITT_NOTIFY
1796 __kmp_threads[global_tid]->th.th_ident = loc;
1797#endif
1798 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1799#if OMPT_SUPPORT && OMPT_OPTIONAL
1800 if (ompt_enabled.enabled) {
1801 ompt_frame->enter_frame = ompt_data_none;
1802 }
1803#endif
1804
1805 ret = __kmpc_master(loc, global_tid);
1806
1807 if (__kmp_env_consistency_check) {
1808 /* there's no __kmpc_end_master called; so the (stats) */
1809 /* actions of __kmpc_end_master are done here */
1810 if (ret) {
1811 /* only one thread should do the pop since only */
1812 /* one did the push (see __kmpc_master()) */
1813 __kmp_pop_sync(global_tid, ct_master, loc);
1814 }
1815 }
1816
1817 return (ret);
1818}
1819
1820/* The BARRIER for a SINGLE process section is always explicit */
1832kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1833 __kmp_assert_valid_gtid(global_tid);
1834 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1835
1836 if (rc) {
1837 // We are going to execute the single statement, so we should count it.
1838 KMP_COUNT_BLOCK(OMP_SINGLE);
1839 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1840 }
1841
1842#if OMPT_SUPPORT && OMPT_OPTIONAL
1843 kmp_info_t *this_thr = __kmp_threads[global_tid];
1844 kmp_team_t *team = this_thr->th.th_team;
1845 int tid = __kmp_tid_from_gtid(global_tid);
1846
1847 if (ompt_enabled.enabled) {
1848 if (rc) {
1849 if (ompt_enabled.ompt_callback_work) {
1850 ompt_callbacks.ompt_callback(ompt_callback_work)(
1851 ompt_work_single_executor, ompt_scope_begin,
1852 &(team->t.ompt_team_info.parallel_data),
1853 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1854 1, OMPT_GET_RETURN_ADDRESS(0));
1855 }
1856 } else {
1857 if (ompt_enabled.ompt_callback_work) {
1858 ompt_callbacks.ompt_callback(ompt_callback_work)(
1859 ompt_work_single_other, ompt_scope_begin,
1860 &(team->t.ompt_team_info.parallel_data),
1861 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1862 1, OMPT_GET_RETURN_ADDRESS(0));
1863 ompt_callbacks.ompt_callback(ompt_callback_work)(
1864 ompt_work_single_other, ompt_scope_end,
1865 &(team->t.ompt_team_info.parallel_data),
1866 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1867 1, OMPT_GET_RETURN_ADDRESS(0));
1868 }
1869 }
1870 }
1871#endif
1872
1873 return rc;
1874}
1875
1885void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1886 __kmp_assert_valid_gtid(global_tid);
1887 __kmp_exit_single(global_tid);
1888 KMP_POP_PARTITIONED_TIMER();
1889
1890#if OMPT_SUPPORT && OMPT_OPTIONAL
1891 kmp_info_t *this_thr = __kmp_threads[global_tid];
1892 kmp_team_t *team = this_thr->th.th_team;
1893 int tid = __kmp_tid_from_gtid(global_tid);
1894
1895 if (ompt_enabled.ompt_callback_work) {
1896 ompt_callbacks.ompt_callback(ompt_callback_work)(
1897 ompt_work_single_executor, ompt_scope_end,
1898 &(team->t.ompt_team_info.parallel_data),
1899 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1900 OMPT_GET_RETURN_ADDRESS(0));
1901 }
1902#endif
1903}
1904
1912void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1913 KMP_POP_PARTITIONED_TIMER();
1914 KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1915
1916#if OMPT_SUPPORT && OMPT_OPTIONAL
1917 if (ompt_enabled.ompt_callback_work) {
1918 ompt_work_t ompt_work_type = ompt_work_loop;
1919 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1920 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1921 // Determine workshare type
1922 if (loc != NULL) {
1923 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1924 ompt_work_type = ompt_work_loop;
1925 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1926 ompt_work_type = ompt_work_sections;
1927 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1928 ompt_work_type = ompt_work_distribute;
1929 } else {
1930 // use default set above.
1931 // a warning about this case is provided in __kmpc_for_static_init
1932 }
1933 KMP_DEBUG_ASSERT(ompt_work_type);
1934 }
1935 ompt_callbacks.ompt_callback(ompt_callback_work)(
1936 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1937 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1938 }
1939#endif
1940 if (__kmp_env_consistency_check)
1941 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1942}
1943
1944// User routines which take C-style arguments (call by value)
1945// different from the Fortran equivalent routines
1946
1947void ompc_set_num_threads(int arg) {
1948 // !!!!! TODO: check the per-task binding
1949 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1950}
1951
1952void ompc_set_dynamic(int flag) {
1953 kmp_info_t *thread;
1954
1955 /* For the thread-private implementation of the internal controls */
1956 thread = __kmp_entry_thread();
1957
1958 __kmp_save_internal_controls(thread);
1959
1960 set__dynamic(thread, flag ? true : false);
1961}
1962
1963void ompc_set_nested(int flag) {
1964 kmp_info_t *thread;
1965
1966 /* For the thread-private internal controls implementation */
1967 thread = __kmp_entry_thread();
1968
1969 __kmp_save_internal_controls(thread);
1970
1971 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1972}
1973
1974void ompc_set_max_active_levels(int max_active_levels) {
1975 /* TO DO */
1976 /* we want per-task implementation of this internal control */
1977
1978 /* For the per-thread internal controls implementation */
1979 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1980}
1981
1982void ompc_set_schedule(omp_sched_t kind, int modifier) {
1983 // !!!!! TODO: check the per-task binding
1984 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1985}
1986
1987int ompc_get_ancestor_thread_num(int level) {
1988 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1989}
1990
1991int ompc_get_team_size(int level) {
1992 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1993}
1994
1995/* OpenMP 5.0 Affinity Format API */
1996void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
1997 if (!__kmp_init_serial) {
1998 __kmp_serial_initialize();
1999 }
2000 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2001 format, KMP_STRLEN(format) + 1);
2002}
2003
2004size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2005 size_t format_size;
2006 if (!__kmp_init_serial) {
2007 __kmp_serial_initialize();
2008 }
2009 format_size = KMP_STRLEN(__kmp_affinity_format);
2010 if (buffer && size) {
2011 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2012 format_size + 1);
2013 }
2014 return format_size;
2015}
2016
2017void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2018 int gtid;
2019 if (!TCR_4(__kmp_init_middle)) {
2020 __kmp_middle_initialize();
2021 }
2022 __kmp_assign_root_init_mask();
2023 gtid = __kmp_get_gtid();
2024 __kmp_aux_display_affinity(gtid, format);
2025}
2026
2027size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2028 char const *format) {
2029 int gtid;
2030 size_t num_required;
2031 kmp_str_buf_t capture_buf;
2032 if (!TCR_4(__kmp_init_middle)) {
2033 __kmp_middle_initialize();
2034 }
2035 __kmp_assign_root_init_mask();
2036 gtid = __kmp_get_gtid();
2037 __kmp_str_buf_init(&capture_buf);
2038 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2039 if (buffer && buf_size) {
2040 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2041 capture_buf.used + 1);
2042 }
2043 __kmp_str_buf_free(&capture_buf);
2044 return num_required;
2045}
2046
2047void kmpc_set_stacksize(int arg) {
2048 // __kmp_aux_set_stacksize initializes the library if needed
2049 __kmp_aux_set_stacksize(arg);
2050}
2051
2052void kmpc_set_stacksize_s(size_t arg) {
2053 // __kmp_aux_set_stacksize initializes the library if needed
2054 __kmp_aux_set_stacksize(arg);
2055}
2056
2057void kmpc_set_blocktime(int arg) {
2058 int gtid, tid;
2059 kmp_info_t *thread;
2060
2061 gtid = __kmp_entry_gtid();
2062 tid = __kmp_tid_from_gtid(gtid);
2063 thread = __kmp_thread_from_gtid(gtid);
2064
2065 __kmp_aux_set_blocktime(arg, thread, tid);
2066}
2067
2068void kmpc_set_library(int arg) {
2069 // __kmp_user_set_library initializes the library if needed
2070 __kmp_user_set_library((enum library_type)arg);
2071}
2072
2073void kmpc_set_defaults(char const *str) {
2074 // __kmp_aux_set_defaults initializes the library if needed
2075 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2076}
2077
2078void kmpc_set_disp_num_buffers(int arg) {
2079 // ignore after initialization because some teams have already
2080 // allocated dispatch buffers
2081 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2082 arg <= KMP_MAX_DISP_NUM_BUFF) {
2083 __kmp_dispatch_num_buffers = arg;
2084 }
2085}
2086
2087int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2088#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2089 return -1;
2090#else
2091 if (!TCR_4(__kmp_init_middle)) {
2092 __kmp_middle_initialize();
2093 }
2094 __kmp_assign_root_init_mask();
2095 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2096#endif
2097}
2098
2099int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2100#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2101 return -1;
2102#else
2103 if (!TCR_4(__kmp_init_middle)) {
2104 __kmp_middle_initialize();
2105 }
2106 __kmp_assign_root_init_mask();
2107 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2108#endif
2109}
2110
2111int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2112#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2113 return -1;
2114#else
2115 if (!TCR_4(__kmp_init_middle)) {
2116 __kmp_middle_initialize();
2117 }
2118 __kmp_assign_root_init_mask();
2119 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2120#endif
2121}
2122
2123/* -------------------------------------------------------------------------- */
2168void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2169 void *cpy_data, void (*cpy_func)(void *, void *),
2170 kmp_int32 didit) {
2171 void **data_ptr;
2172 KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2173 __kmp_assert_valid_gtid(gtid);
2174
2175 KMP_MB();
2176
2177 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2178
2179 if (__kmp_env_consistency_check) {
2180 if (loc == 0) {
2181 KMP_WARNING(ConstructIdentInvalid);
2182 }
2183 }
2184
2185 // ToDo: Optimize the following two barriers into some kind of split barrier
2186
2187 if (didit)
2188 *data_ptr = cpy_data;
2189
2190#if OMPT_SUPPORT
2191 ompt_frame_t *ompt_frame;
2192 if (ompt_enabled.enabled) {
2193 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2194 if (ompt_frame->enter_frame.ptr == NULL)
2195 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2196 }
2197 OMPT_STORE_RETURN_ADDRESS(gtid);
2198#endif
2199/* This barrier is not a barrier region boundary */
2200#if USE_ITT_NOTIFY
2201 __kmp_threads[gtid]->th.th_ident = loc;
2202#endif
2203 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2204
2205 if (!didit)
2206 (*cpy_func)(cpy_data, *data_ptr);
2207
2208 // Consider next barrier a user-visible barrier for barrier region boundaries
2209 // Nesting checks are already handled by the single construct checks
2210 {
2211#if OMPT_SUPPORT
2212 OMPT_STORE_RETURN_ADDRESS(gtid);
2213#endif
2214#if USE_ITT_NOTIFY
2215 __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2216// tasks can overwrite the location)
2217#endif
2218 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2219#if OMPT_SUPPORT && OMPT_OPTIONAL
2220 if (ompt_enabled.enabled) {
2221 ompt_frame->enter_frame = ompt_data_none;
2222 }
2223#endif
2224 }
2225}
2226
2227/* -------------------------------------------------------------------------- */
2228
2229#define INIT_LOCK __kmp_init_user_lock_with_checks
2230#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2231#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2232#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2233#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2234#define ACQUIRE_NESTED_LOCK_TIMED \
2235 __kmp_acquire_nested_user_lock_with_checks_timed
2236#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2237#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2238#define TEST_LOCK __kmp_test_user_lock_with_checks
2239#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2240#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2241#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2242
2243// TODO: Make check abort messages use location info & pass it into
2244// with_checks routines
2245
2246#if KMP_USE_DYNAMIC_LOCK
2247
2248// internal lock initializer
2249static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2250 kmp_dyna_lockseq_t seq) {
2251 if (KMP_IS_D_LOCK(seq)) {
2252 KMP_INIT_D_LOCK(lock, seq);
2253#if USE_ITT_BUILD
2254 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2255#endif
2256 } else {
2257 KMP_INIT_I_LOCK(lock, seq);
2258#if USE_ITT_BUILD
2259 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2260 __kmp_itt_lock_creating(ilk->lock, loc);
2261#endif
2262 }
2263}
2264
2265// internal nest lock initializer
2266static __forceinline void
2267__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2268 kmp_dyna_lockseq_t seq) {
2269#if KMP_USE_TSX
2270 // Don't have nested lock implementation for speculative locks
2271 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2272 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2273 seq = __kmp_user_lock_seq;
2274#endif
2275 switch (seq) {
2276 case lockseq_tas:
2277 seq = lockseq_nested_tas;
2278 break;
2279#if KMP_USE_FUTEX
2280 case lockseq_futex:
2281 seq = lockseq_nested_futex;
2282 break;
2283#endif
2284 case lockseq_ticket:
2285 seq = lockseq_nested_ticket;
2286 break;
2287 case lockseq_queuing:
2288 seq = lockseq_nested_queuing;
2289 break;
2290 case lockseq_drdpa:
2291 seq = lockseq_nested_drdpa;
2292 break;
2293 default:
2294 seq = lockseq_nested_queuing;
2295 }
2296 KMP_INIT_I_LOCK(lock, seq);
2297#if USE_ITT_BUILD
2298 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2299 __kmp_itt_lock_creating(ilk->lock, loc);
2300#endif
2301}
2302
2303/* initialize the lock with a hint */
2304void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2305 uintptr_t hint) {
2306 KMP_DEBUG_ASSERT(__kmp_init_serial);
2307 if (__kmp_env_consistency_check && user_lock == NULL) {
2308 KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2309 }
2310
2311 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2312
2313#if OMPT_SUPPORT && OMPT_OPTIONAL
2314 // This is the case, if called from omp_init_lock_with_hint:
2315 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2316 if (!codeptr)
2317 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2318 if (ompt_enabled.ompt_callback_lock_init) {
2319 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2320 ompt_mutex_lock, (omp_lock_hint_t)hint,
2321 __ompt_get_mutex_impl_type(user_lock),
2322 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2323 }
2324#endif
2325}
2326
2327/* initialize the lock with a hint */
2328void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2329 void **user_lock, uintptr_t hint) {
2330 KMP_DEBUG_ASSERT(__kmp_init_serial);
2331 if (__kmp_env_consistency_check && user_lock == NULL) {
2332 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2333 }
2334
2335 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2336
2337#if OMPT_SUPPORT && OMPT_OPTIONAL
2338 // This is the case, if called from omp_init_lock_with_hint:
2339 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2340 if (!codeptr)
2341 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2342 if (ompt_enabled.ompt_callback_lock_init) {
2343 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2344 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2345 __ompt_get_mutex_impl_type(user_lock),
2346 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2347 }
2348#endif
2349}
2350
2351#endif // KMP_USE_DYNAMIC_LOCK
2352
2353/* initialize the lock */
2354void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2355#if KMP_USE_DYNAMIC_LOCK
2356
2357 KMP_DEBUG_ASSERT(__kmp_init_serial);
2358 if (__kmp_env_consistency_check && user_lock == NULL) {
2359 KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2360 }
2361 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2362
2363#if OMPT_SUPPORT && OMPT_OPTIONAL
2364 // This is the case, if called from omp_init_lock_with_hint:
2365 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2366 if (!codeptr)
2367 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2368 if (ompt_enabled.ompt_callback_lock_init) {
2369 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2370 ompt_mutex_lock, omp_lock_hint_none,
2371 __ompt_get_mutex_impl_type(user_lock),
2372 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2373 }
2374#endif
2375
2376#else // KMP_USE_DYNAMIC_LOCK
2377
2378 static char const *const func = "omp_init_lock";
2379 kmp_user_lock_p lck;
2380 KMP_DEBUG_ASSERT(__kmp_init_serial);
2381
2382 if (__kmp_env_consistency_check) {
2383 if (user_lock == NULL) {
2384 KMP_FATAL(LockIsUninitialized, func);
2385 }
2386 }
2387
2388 KMP_CHECK_USER_LOCK_INIT();
2389
2390 if ((__kmp_user_lock_kind == lk_tas) &&
2391 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2392 lck = (kmp_user_lock_p)user_lock;
2393 }
2394#if KMP_USE_FUTEX
2395 else if ((__kmp_user_lock_kind == lk_futex) &&
2396 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2397 lck = (kmp_user_lock_p)user_lock;
2398 }
2399#endif
2400 else {
2401 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2402 }
2403 INIT_LOCK(lck);
2404 __kmp_set_user_lock_location(lck, loc);
2405
2406#if OMPT_SUPPORT && OMPT_OPTIONAL
2407 // This is the case, if called from omp_init_lock_with_hint:
2408 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2409 if (!codeptr)
2410 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2411 if (ompt_enabled.ompt_callback_lock_init) {
2412 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2413 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2414 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2415 }
2416#endif
2417
2418#if USE_ITT_BUILD
2419 __kmp_itt_lock_creating(lck);
2420#endif /* USE_ITT_BUILD */
2421
2422#endif // KMP_USE_DYNAMIC_LOCK
2423} // __kmpc_init_lock
2424
2425/* initialize the lock */
2426void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2427#if KMP_USE_DYNAMIC_LOCK
2428
2429 KMP_DEBUG_ASSERT(__kmp_init_serial);
2430 if (__kmp_env_consistency_check && user_lock == NULL) {
2431 KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2432 }
2433 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2434
2435#if OMPT_SUPPORT && OMPT_OPTIONAL
2436 // This is the case, if called from omp_init_lock_with_hint:
2437 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2438 if (!codeptr)
2439 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2440 if (ompt_enabled.ompt_callback_lock_init) {
2441 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2442 ompt_mutex_nest_lock, omp_lock_hint_none,
2443 __ompt_get_mutex_impl_type(user_lock),
2444 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2445 }
2446#endif
2447
2448#else // KMP_USE_DYNAMIC_LOCK
2449
2450 static char const *const func = "omp_init_nest_lock";
2451 kmp_user_lock_p lck;
2452 KMP_DEBUG_ASSERT(__kmp_init_serial);
2453
2454 if (__kmp_env_consistency_check) {
2455 if (user_lock == NULL) {
2456 KMP_FATAL(LockIsUninitialized, func);
2457 }
2458 }
2459
2460 KMP_CHECK_USER_LOCK_INIT();
2461
2462 if ((__kmp_user_lock_kind == lk_tas) &&
2463 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2464 OMP_NEST_LOCK_T_SIZE)) {
2465 lck = (kmp_user_lock_p)user_lock;
2466 }
2467#if KMP_USE_FUTEX
2468 else if ((__kmp_user_lock_kind == lk_futex) &&
2469 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2470 OMP_NEST_LOCK_T_SIZE)) {
2471 lck = (kmp_user_lock_p)user_lock;
2472 }
2473#endif
2474 else {
2475 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2476 }
2477
2478 INIT_NESTED_LOCK(lck);
2479 __kmp_set_user_lock_location(lck, loc);
2480
2481#if OMPT_SUPPORT && OMPT_OPTIONAL
2482 // This is the case, if called from omp_init_lock_with_hint:
2483 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2484 if (!codeptr)
2485 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2486 if (ompt_enabled.ompt_callback_lock_init) {
2487 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2488 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2489 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2490 }
2491#endif
2492
2493#if USE_ITT_BUILD
2494 __kmp_itt_lock_creating(lck);
2495#endif /* USE_ITT_BUILD */
2496
2497#endif // KMP_USE_DYNAMIC_LOCK
2498} // __kmpc_init_nest_lock
2499
2500void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2501#if KMP_USE_DYNAMIC_LOCK
2502
2503#if USE_ITT_BUILD
2504 kmp_user_lock_p lck;
2505 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2506 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2507 } else {
2508 lck = (kmp_user_lock_p)user_lock;
2509 }
2510 __kmp_itt_lock_destroyed(lck);
2511#endif
2512#if OMPT_SUPPORT && OMPT_OPTIONAL
2513 // This is the case, if called from omp_init_lock_with_hint:
2514 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2515 if (!codeptr)
2516 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2517 if (ompt_enabled.ompt_callback_lock_destroy) {
2518 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2519 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2520 }
2521#endif
2522 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2523#else
2524 kmp_user_lock_p lck;
2525
2526 if ((__kmp_user_lock_kind == lk_tas) &&
2527 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2528 lck = (kmp_user_lock_p)user_lock;
2529 }
2530#if KMP_USE_FUTEX
2531 else if ((__kmp_user_lock_kind == lk_futex) &&
2532 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2533 lck = (kmp_user_lock_p)user_lock;
2534 }
2535#endif
2536 else {
2537 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2538 }
2539
2540#if OMPT_SUPPORT && OMPT_OPTIONAL
2541 // This is the case, if called from omp_init_lock_with_hint:
2542 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2543 if (!codeptr)
2544 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2545 if (ompt_enabled.ompt_callback_lock_destroy) {
2546 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2547 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2548 }
2549#endif
2550
2551#if USE_ITT_BUILD
2552 __kmp_itt_lock_destroyed(lck);
2553#endif /* USE_ITT_BUILD */
2554 DESTROY_LOCK(lck);
2555
2556 if ((__kmp_user_lock_kind == lk_tas) &&
2557 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2558 ;
2559 }
2560#if KMP_USE_FUTEX
2561 else if ((__kmp_user_lock_kind == lk_futex) &&
2562 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2563 ;
2564 }
2565#endif
2566 else {
2567 __kmp_user_lock_free(user_lock, gtid, lck);
2568 }
2569#endif // KMP_USE_DYNAMIC_LOCK
2570} // __kmpc_destroy_lock
2571
2572/* destroy the lock */
2573void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2574#if KMP_USE_DYNAMIC_LOCK
2575
2576#if USE_ITT_BUILD
2577 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2578 __kmp_itt_lock_destroyed(ilk->lock);
2579#endif
2580#if OMPT_SUPPORT && OMPT_OPTIONAL
2581 // This is the case, if called from omp_init_lock_with_hint:
2582 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2583 if (!codeptr)
2584 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2585 if (ompt_enabled.ompt_callback_lock_destroy) {
2586 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2587 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2588 }
2589#endif
2590 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2591
2592#else // KMP_USE_DYNAMIC_LOCK
2593
2594 kmp_user_lock_p lck;
2595
2596 if ((__kmp_user_lock_kind == lk_tas) &&
2597 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2598 OMP_NEST_LOCK_T_SIZE)) {
2599 lck = (kmp_user_lock_p)user_lock;
2600 }
2601#if KMP_USE_FUTEX
2602 else if ((__kmp_user_lock_kind == lk_futex) &&
2603 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2604 OMP_NEST_LOCK_T_SIZE)) {
2605 lck = (kmp_user_lock_p)user_lock;
2606 }
2607#endif
2608 else {
2609 lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2610 }
2611
2612#if OMPT_SUPPORT && OMPT_OPTIONAL
2613 // This is the case, if called from omp_init_lock_with_hint:
2614 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2615 if (!codeptr)
2616 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2617 if (ompt_enabled.ompt_callback_lock_destroy) {
2618 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2619 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2620 }
2621#endif
2622
2623#if USE_ITT_BUILD
2624 __kmp_itt_lock_destroyed(lck);
2625#endif /* USE_ITT_BUILD */
2626
2627 DESTROY_NESTED_LOCK(lck);
2628
2629 if ((__kmp_user_lock_kind == lk_tas) &&
2630 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2631 OMP_NEST_LOCK_T_SIZE)) {
2632 ;
2633 }
2634#if KMP_USE_FUTEX
2635 else if ((__kmp_user_lock_kind == lk_futex) &&
2636 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2637 OMP_NEST_LOCK_T_SIZE)) {
2638 ;
2639 }
2640#endif
2641 else {
2642 __kmp_user_lock_free(user_lock, gtid, lck);
2643 }
2644#endif // KMP_USE_DYNAMIC_LOCK
2645} // __kmpc_destroy_nest_lock
2646
2647void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2648 KMP_COUNT_BLOCK(OMP_set_lock);
2649#if KMP_USE_DYNAMIC_LOCK
2650 int tag = KMP_EXTRACT_D_TAG(user_lock);
2651#if USE_ITT_BUILD
2652 __kmp_itt_lock_acquiring(
2653 (kmp_user_lock_p)
2654 user_lock); // itt function will get to the right lock object.
2655#endif
2656#if OMPT_SUPPORT && OMPT_OPTIONAL
2657 // This is the case, if called from omp_init_lock_with_hint:
2658 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2659 if (!codeptr)
2660 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2661 if (ompt_enabled.ompt_callback_mutex_acquire) {
2662 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2663 ompt_mutex_lock, omp_lock_hint_none,
2664 __ompt_get_mutex_impl_type(user_lock),
2665 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2666 }
2667#endif
2668#if KMP_USE_INLINED_TAS
2669 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2670 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2671 } else
2672#elif KMP_USE_INLINED_FUTEX
2673 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2674 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2675 } else
2676#endif
2677 {
2678 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2679 }
2680#if USE_ITT_BUILD
2681 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2682#endif
2683#if OMPT_SUPPORT && OMPT_OPTIONAL
2684 if (ompt_enabled.ompt_callback_mutex_acquired) {
2685 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2686 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2687 }
2688#endif
2689
2690#else // KMP_USE_DYNAMIC_LOCK
2691
2692 kmp_user_lock_p lck;
2693
2694 if ((__kmp_user_lock_kind == lk_tas) &&
2695 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2696 lck = (kmp_user_lock_p)user_lock;
2697 }
2698#if KMP_USE_FUTEX
2699 else if ((__kmp_user_lock_kind == lk_futex) &&
2700 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2701 lck = (kmp_user_lock_p)user_lock;
2702 }
2703#endif
2704 else {
2705 lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2706 }
2707
2708#if USE_ITT_BUILD
2709 __kmp_itt_lock_acquiring(lck);
2710#endif /* USE_ITT_BUILD */
2711#if OMPT_SUPPORT && OMPT_OPTIONAL
2712 // This is the case, if called from omp_init_lock_with_hint:
2713 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2714 if (!codeptr)
2715 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2716 if (ompt_enabled.ompt_callback_mutex_acquire) {
2717 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2718 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2719 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2720 }
2721#endif
2722
2723 ACQUIRE_LOCK(lck, gtid);
2724
2725#if USE_ITT_BUILD
2726 __kmp_itt_lock_acquired(lck);
2727#endif /* USE_ITT_BUILD */
2728
2729#if OMPT_SUPPORT && OMPT_OPTIONAL
2730 if (ompt_enabled.ompt_callback_mutex_acquired) {
2731 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2732 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2733 }
2734#endif
2735
2736#endif // KMP_USE_DYNAMIC_LOCK
2737}
2738
2739void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2740#if KMP_USE_DYNAMIC_LOCK
2741
2742#if USE_ITT_BUILD
2743 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2744#endif
2745#if OMPT_SUPPORT && OMPT_OPTIONAL
2746 // This is the case, if called from omp_init_lock_with_hint:
2747 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2748 if (!codeptr)
2749 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2750 if (ompt_enabled.enabled) {
2751 if (ompt_enabled.ompt_callback_mutex_acquire) {
2752 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2753 ompt_mutex_nest_lock, omp_lock_hint_none,
2754 __ompt_get_mutex_impl_type(user_lock),
2755 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2756 }
2757 }
2758#endif
2759 int acquire_status =
2760 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2761 (void)acquire_status;
2762#if USE_ITT_BUILD
2763 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2764#endif
2765
2766#if OMPT_SUPPORT && OMPT_OPTIONAL
2767 if (ompt_enabled.enabled) {
2768 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2769 if (ompt_enabled.ompt_callback_mutex_acquired) {
2770 // lock_first
2771 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2772 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2773 codeptr);
2774 }
2775 } else {
2776 if (ompt_enabled.ompt_callback_nest_lock) {
2777 // lock_next
2778 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2779 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2780 }
2781 }
2782 }
2783#endif
2784
2785#else // KMP_USE_DYNAMIC_LOCK
2786 int acquire_status;
2787 kmp_user_lock_p lck;
2788
2789 if ((__kmp_user_lock_kind == lk_tas) &&
2790 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2791 OMP_NEST_LOCK_T_SIZE)) {
2792 lck = (kmp_user_lock_p)user_lock;
2793 }
2794#if KMP_USE_FUTEX
2795 else if ((__kmp_user_lock_kind == lk_futex) &&
2796 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2797 OMP_NEST_LOCK_T_SIZE)) {
2798 lck = (kmp_user_lock_p)user_lock;
2799 }
2800#endif
2801 else {
2802 lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2803 }
2804
2805#if USE_ITT_BUILD
2806 __kmp_itt_lock_acquiring(lck);
2807#endif /* USE_ITT_BUILD */
2808#if OMPT_SUPPORT && OMPT_OPTIONAL
2809 // This is the case, if called from omp_init_lock_with_hint:
2810 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2811 if (!codeptr)
2812 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2813 if (ompt_enabled.enabled) {
2814 if (ompt_enabled.ompt_callback_mutex_acquire) {
2815 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2816 ompt_mutex_nest_lock, omp_lock_hint_none,
2817 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2818 codeptr);
2819 }
2820 }
2821#endif
2822
2823 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2824
2825#if USE_ITT_BUILD
2826 __kmp_itt_lock_acquired(lck);
2827#endif /* USE_ITT_BUILD */
2828
2829#if OMPT_SUPPORT && OMPT_OPTIONAL
2830 if (ompt_enabled.enabled) {
2831 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2832 if (ompt_enabled.ompt_callback_mutex_acquired) {
2833 // lock_first
2834 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2835 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2836 }
2837 } else {
2838 if (ompt_enabled.ompt_callback_nest_lock) {
2839 // lock_next
2840 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2841 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2842 }
2843 }
2844 }
2845#endif
2846
2847#endif // KMP_USE_DYNAMIC_LOCK
2848}
2849
2850void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2851#if KMP_USE_DYNAMIC_LOCK
2852
2853 int tag = KMP_EXTRACT_D_TAG(user_lock);
2854#if USE_ITT_BUILD
2855 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2856#endif
2857#if KMP_USE_INLINED_TAS
2858 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2859 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2860 } else
2861#elif KMP_USE_INLINED_FUTEX
2862 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2863 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2864 } else
2865#endif
2866 {
2867 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2868 }
2869
2870#if OMPT_SUPPORT && OMPT_OPTIONAL
2871 // This is the case, if called from omp_init_lock_with_hint:
2872 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2873 if (!codeptr)
2874 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2875 if (ompt_enabled.ompt_callback_mutex_released) {
2876 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2877 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2878 }
2879#endif
2880
2881#else // KMP_USE_DYNAMIC_LOCK
2882
2883 kmp_user_lock_p lck;
2884
2885 /* Can't use serial interval since not block structured */
2886 /* release the lock */
2887
2888 if ((__kmp_user_lock_kind == lk_tas) &&
2889 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2890#if KMP_OS_LINUX && \
2891 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2892// "fast" path implemented to fix customer performance issue
2893#if USE_ITT_BUILD
2894 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2895#endif /* USE_ITT_BUILD */
2896 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2897 KMP_MB();
2898
2899#if OMPT_SUPPORT && OMPT_OPTIONAL
2900 // This is the case, if called from omp_init_lock_with_hint:
2901 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2902 if (!codeptr)
2903 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2904 if (ompt_enabled.ompt_callback_mutex_released) {
2905 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2906 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2907 }
2908#endif
2909
2910 return;
2911#else
2912 lck = (kmp_user_lock_p)user_lock;
2913#endif
2914 }
2915#if KMP_USE_FUTEX
2916 else if ((__kmp_user_lock_kind == lk_futex) &&
2917 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2918 lck = (kmp_user_lock_p)user_lock;
2919 }
2920#endif
2921 else {
2922 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2923 }
2924
2925#if USE_ITT_BUILD
2926 __kmp_itt_lock_releasing(lck);
2927#endif /* USE_ITT_BUILD */
2928
2929 RELEASE_LOCK(lck, gtid);
2930
2931#if OMPT_SUPPORT && OMPT_OPTIONAL
2932 // This is the case, if called from omp_init_lock_with_hint:
2933 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2934 if (!codeptr)
2935 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2936 if (ompt_enabled.ompt_callback_mutex_released) {
2937 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2938 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2939 }
2940#endif
2941
2942#endif // KMP_USE_DYNAMIC_LOCK
2943}
2944
2945/* release the lock */
2946void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2947#if KMP_USE_DYNAMIC_LOCK
2948
2949#if USE_ITT_BUILD
2950 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2951#endif
2952 int release_status =
2953 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2954 (void)release_status;
2955
2956#if OMPT_SUPPORT && OMPT_OPTIONAL
2957 // This is the case, if called from omp_init_lock_with_hint:
2958 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2959 if (!codeptr)
2960 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2961 if (ompt_enabled.enabled) {
2962 if (release_status == KMP_LOCK_RELEASED) {
2963 if (ompt_enabled.ompt_callback_mutex_released) {
2964 // release_lock_last
2965 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2966 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2967 codeptr);
2968 }
2969 } else if (ompt_enabled.ompt_callback_nest_lock) {
2970 // release_lock_prev
2971 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2972 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2973 }
2974 }
2975#endif
2976
2977#else // KMP_USE_DYNAMIC_LOCK
2978
2979 kmp_user_lock_p lck;
2980
2981 /* Can't use serial interval since not block structured */
2982
2983 if ((__kmp_user_lock_kind == lk_tas) &&
2984 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2985 OMP_NEST_LOCK_T_SIZE)) {
2986#if KMP_OS_LINUX && \
2987 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2988 // "fast" path implemented to fix customer performance issue
2989 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2990#if USE_ITT_BUILD
2991 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2992#endif /* USE_ITT_BUILD */
2993
2994#if OMPT_SUPPORT && OMPT_OPTIONAL
2995 int release_status = KMP_LOCK_STILL_HELD;
2996#endif
2997
2998 if (--(tl->lk.depth_locked) == 0) {
2999 TCW_4(tl->lk.poll, 0);
3000#if OMPT_SUPPORT && OMPT_OPTIONAL
3001 release_status = KMP_LOCK_RELEASED;
3002#endif
3003 }
3004 KMP_MB();
3005
3006#if OMPT_SUPPORT && OMPT_OPTIONAL
3007 // This is the case, if called from omp_init_lock_with_hint:
3008 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3009 if (!codeptr)
3010 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3011 if (ompt_enabled.enabled) {
3012 if (release_status == KMP_LOCK_RELEASED) {
3013 if (ompt_enabled.ompt_callback_mutex_released) {
3014 // release_lock_last
3015 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3016 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3017 }
3018 } else if (ompt_enabled.ompt_callback_nest_lock) {
3019 // release_lock_previous
3020 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3021 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3022 }
3023 }
3024#endif
3025
3026 return;
3027#else
3028 lck = (kmp_user_lock_p)user_lock;
3029#endif
3030 }
3031#if KMP_USE_FUTEX
3032 else if ((__kmp_user_lock_kind == lk_futex) &&
3033 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3034 OMP_NEST_LOCK_T_SIZE)) {
3035 lck = (kmp_user_lock_p)user_lock;
3036 }
3037#endif
3038 else {
3039 lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3040 }
3041
3042#if USE_ITT_BUILD
3043 __kmp_itt_lock_releasing(lck);
3044#endif /* USE_ITT_BUILD */
3045
3046 int release_status;
3047 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3048#if OMPT_SUPPORT && OMPT_OPTIONAL
3049 // This is the case, if called from omp_init_lock_with_hint:
3050 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3051 if (!codeptr)
3052 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3053 if (ompt_enabled.enabled) {
3054 if (release_status == KMP_LOCK_RELEASED) {
3055 if (ompt_enabled.ompt_callback_mutex_released) {
3056 // release_lock_last
3057 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3058 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3059 }
3060 } else if (ompt_enabled.ompt_callback_nest_lock) {
3061 // release_lock_previous
3062 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3063 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3064 }
3065 }
3066#endif
3067
3068#endif // KMP_USE_DYNAMIC_LOCK
3069}
3070
3071/* try to acquire the lock */
3072int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3073 KMP_COUNT_BLOCK(OMP_test_lock);
3074
3075#if KMP_USE_DYNAMIC_LOCK
3076 int rc;
3077 int tag = KMP_EXTRACT_D_TAG(user_lock);
3078#if USE_ITT_BUILD
3079 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3080#endif
3081#if OMPT_SUPPORT && OMPT_OPTIONAL
3082 // This is the case, if called from omp_init_lock_with_hint:
3083 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3084 if (!codeptr)
3085 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3086 if (ompt_enabled.ompt_callback_mutex_acquire) {
3087 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3088 ompt_mutex_lock, omp_lock_hint_none,
3089 __ompt_get_mutex_impl_type(user_lock),
3090 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3091 }
3092#endif
3093#if KMP_USE_INLINED_TAS
3094 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3095 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3096 } else
3097#elif KMP_USE_INLINED_FUTEX
3098 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3099 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3100 } else
3101#endif
3102 {
3103 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3104 }
3105 if (rc) {
3106#if USE_ITT_BUILD
3107 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3108#endif
3109#if OMPT_SUPPORT && OMPT_OPTIONAL
3110 if (ompt_enabled.ompt_callback_mutex_acquired) {
3111 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3112 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3113 }
3114#endif
3115 return FTN_TRUE;
3116 } else {
3117#if USE_ITT_BUILD
3118 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3119#endif
3120 return FTN_FALSE;
3121 }
3122
3123#else // KMP_USE_DYNAMIC_LOCK
3124
3125 kmp_user_lock_p lck;
3126 int rc;
3127
3128 if ((__kmp_user_lock_kind == lk_tas) &&
3129 (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3130 lck = (kmp_user_lock_p)user_lock;
3131 }
3132#if KMP_USE_FUTEX
3133 else if ((__kmp_user_lock_kind == lk_futex) &&
3134 (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3135 lck = (kmp_user_lock_p)user_lock;
3136 }
3137#endif
3138 else {
3139 lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3140 }
3141
3142#if USE_ITT_BUILD
3143 __kmp_itt_lock_acquiring(lck);
3144#endif /* USE_ITT_BUILD */
3145#if OMPT_SUPPORT && OMPT_OPTIONAL
3146 // This is the case, if called from omp_init_lock_with_hint:
3147 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3148 if (!codeptr)
3149 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3150 if (ompt_enabled.ompt_callback_mutex_acquire) {
3151 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3152 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3153 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3154 }
3155#endif
3156
3157 rc = TEST_LOCK(lck, gtid);
3158#if USE_ITT_BUILD
3159 if (rc) {
3160 __kmp_itt_lock_acquired(lck);
3161 } else {
3162 __kmp_itt_lock_cancelled(lck);
3163 }
3164#endif /* USE_ITT_BUILD */
3165#if OMPT_SUPPORT && OMPT_OPTIONAL
3166 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3167 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3168 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3169 }
3170#endif
3171
3172 return (rc ? FTN_TRUE : FTN_FALSE);
3173
3174 /* Can't use serial interval since not block structured */
3175
3176#endif // KMP_USE_DYNAMIC_LOCK
3177}
3178
3179/* try to acquire the lock */
3180int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3181#if KMP_USE_DYNAMIC_LOCK
3182 int rc;
3183#if USE_ITT_BUILD
3184 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3185#endif
3186#if OMPT_SUPPORT && OMPT_OPTIONAL
3187 // This is the case, if called from omp_init_lock_with_hint:
3188 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3189 if (!codeptr)
3190 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3191 if (ompt_enabled.ompt_callback_mutex_acquire) {
3192 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3193 ompt_mutex_nest_lock, omp_lock_hint_none,
3194 __ompt_get_mutex_impl_type(user_lock),
3195 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3196 }
3197#endif
3198 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3199#if USE_ITT_BUILD
3200 if (rc) {
3201 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3202 } else {
3203 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3204 }
3205#endif
3206#if OMPT_SUPPORT && OMPT_OPTIONAL
3207 if (ompt_enabled.enabled && rc) {
3208 if (rc == 1) {
3209 if (ompt_enabled.ompt_callback_mutex_acquired) {
3210 // lock_first
3211 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3212 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3213 codeptr);
3214 }
3215 } else {
3216 if (ompt_enabled.ompt_callback_nest_lock) {
3217 // lock_next
3218 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3219 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3220 }
3221 }
3222 }
3223#endif
3224 return rc;
3225
3226#else // KMP_USE_DYNAMIC_LOCK
3227
3228 kmp_user_lock_p lck;
3229 int rc;
3230
3231 if ((__kmp_user_lock_kind == lk_tas) &&
3232 (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3233 OMP_NEST_LOCK_T_SIZE)) {
3234 lck = (kmp_user_lock_p)user_lock;
3235 }
3236#if KMP_USE_FUTEX
3237 else if ((__kmp_user_lock_kind == lk_futex) &&
3238 (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3239 OMP_NEST_LOCK_T_SIZE)) {
3240 lck = (kmp_user_lock_p)user_lock;
3241 }
3242#endif
3243 else {
3244 lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3245 }
3246
3247#if USE_ITT_BUILD
3248 __kmp_itt_lock_acquiring(lck);
3249#endif /* USE_ITT_BUILD */
3250
3251#if OMPT_SUPPORT && OMPT_OPTIONAL
3252 // This is the case, if called from omp_init_lock_with_hint:
3253 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3254 if (!codeptr)
3255 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3256 if (ompt_enabled.enabled) &&
3257 ompt_enabled.ompt_callback_mutex_acquire) {
3258 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3259 ompt_mutex_nest_lock, omp_lock_hint_none,
3260 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3261 codeptr);
3262 }
3263#endif
3264
3265 rc = TEST_NESTED_LOCK(lck, gtid);
3266#if USE_ITT_BUILD
3267 if (rc) {
3268 __kmp_itt_lock_acquired(lck);
3269 } else {
3270 __kmp_itt_lock_cancelled(lck);
3271 }
3272#endif /* USE_ITT_BUILD */
3273#if OMPT_SUPPORT && OMPT_OPTIONAL
3274 if (ompt_enabled.enabled && rc) {
3275 if (rc == 1) {
3276 if (ompt_enabled.ompt_callback_mutex_acquired) {
3277 // lock_first
3278 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3279 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3280 }
3281 } else {
3282 if (ompt_enabled.ompt_callback_nest_lock) {
3283 // lock_next
3284 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3285 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3286 }
3287 }
3288 }
3289#endif
3290 return rc;
3291
3292 /* Can't use serial interval since not block structured */
3293
3294#endif // KMP_USE_DYNAMIC_LOCK
3295}
3296
3297// Interface to fast scalable reduce methods routines
3298
3299// keep the selected method in a thread local structure for cross-function
3300// usage: will be used in __kmpc_end_reduce* functions;
3301// another solution: to re-determine the method one more time in
3302// __kmpc_end_reduce* functions (new prototype required then)
3303// AT: which solution is better?
3304#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3305 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3306
3307#define __KMP_GET_REDUCTION_METHOD(gtid) \
3308 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3309
3310// description of the packed_reduction_method variable: look at the macros in
3311// kmp.h
3312
3313// used in a critical section reduce block
3314static __forceinline void
3315__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3316 kmp_critical_name *crit) {
3317
3318 // this lock was visible to a customer and to the threading profile tool as a
3319 // serial overhead span (although it's used for an internal purpose only)
3320 // why was it visible in previous implementation?
3321 // should we keep it visible in new reduce block?
3322 kmp_user_lock_p lck;
3323
3324#if KMP_USE_DYNAMIC_LOCK
3325
3326 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3327 // Check if it is initialized.
3328 if (*lk == 0) {
3329 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3330 KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3331 KMP_GET_D_TAG(__kmp_user_lock_seq));
3332 } else {
3333 __kmp_init_indirect_csptr(crit, loc, global_tid,
3334 KMP_GET_I_TAG(__kmp_user_lock_seq));
3335 }
3336 }
3337 // Branch for accessing the actual lock object and set operation. This
3338 // branching is inevitable since this lock initialization does not follow the
3339 // normal dispatch path (lock table is not used).
3340 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3341 lck = (kmp_user_lock_p)lk;
3342 KMP_DEBUG_ASSERT(lck != NULL);
3343 if (__kmp_env_consistency_check) {
3344 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3345 }
3346 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3347 } else {
3348 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3349 lck = ilk->lock;
3350 KMP_DEBUG_ASSERT(lck != NULL);
3351 if (__kmp_env_consistency_check) {
3352 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3353 }
3354 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3355 }
3356
3357#else // KMP_USE_DYNAMIC_LOCK
3358
3359 // We know that the fast reduction code is only emitted by Intel compilers
3360 // with 32 byte critical sections. If there isn't enough space, then we
3361 // have to use a pointer.
3362 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3363 lck = (kmp_user_lock_p)crit;
3364 } else {
3365 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3366 }
3367 KMP_DEBUG_ASSERT(lck != NULL);
3368
3369 if (__kmp_env_consistency_check)
3370 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3371
3372 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3373
3374#endif // KMP_USE_DYNAMIC_LOCK
3375}
3376
3377// used in a critical section reduce block
3378static __forceinline void
3379__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3380 kmp_critical_name *crit) {
3381
3382 kmp_user_lock_p lck;
3383
3384#if KMP_USE_DYNAMIC_LOCK
3385
3386 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3387 lck = (kmp_user_lock_p)crit;
3388 if (__kmp_env_consistency_check)
3389 __kmp_pop_sync(global_tid, ct_critical, loc);
3390 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3391 } else {
3392 kmp_indirect_lock_t *ilk =
3393 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3394 if (__kmp_env_consistency_check)
3395 __kmp_pop_sync(global_tid, ct_critical, loc);
3396 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3397 }
3398
3399#else // KMP_USE_DYNAMIC_LOCK
3400
3401 // We know that the fast reduction code is only emitted by Intel compilers
3402 // with 32 byte critical sections. If there isn't enough space, then we have
3403 // to use a pointer.
3404 if (__kmp_base_user_lock_size > 32) {
3405 lck = *((kmp_user_lock_p *)crit);
3406 KMP_ASSERT(lck != NULL);
3407 } else {
3408 lck = (kmp_user_lock_p)crit;
3409 }
3410
3411 if (__kmp_env_consistency_check)
3412 __kmp_pop_sync(global_tid, ct_critical, loc);
3413
3414 __kmp_release_user_lock_with_checks(lck, global_tid);
3415
3416#endif // KMP_USE_DYNAMIC_LOCK
3417} // __kmp_end_critical_section_reduce_block
3418
3419static __forceinline int
3420__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3421 int *task_state) {
3422 kmp_team_t *team;
3423
3424 // Check if we are inside the teams construct?
3425 if (th->th.th_teams_microtask) {
3426 *team_p = team = th->th.th_team;
3427 if (team->t.t_level == th->th.th_teams_level) {
3428 // This is reduction at teams construct.
3429 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3430 // Let's swap teams temporarily for the reduction.
3431 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3432 th->th.th_team = team->t.t_parent;
3433 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3434 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3435 *task_state = th->th.th_task_state;
3436 th->th.th_task_state = 0;
3437
3438 return 1;
3439 }
3440 }
3441 return 0;
3442}
3443
3444static __forceinline void
3445__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3446 // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3447 th->th.th_info.ds.ds_tid = 0;
3448 th->th.th_team = team;
3449 th->th.th_team_nproc = team->t.t_nproc;
3450 th->th.th_task_team = team->t.t_task_team[task_state];
3451 __kmp_type_convert(task_state, &(th->th.th_task_state));
3452}
3453
3454/* 2.a.i. Reduce Block without a terminating barrier */
3470kmp_int32
3471__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3472 size_t reduce_size, void *reduce_data,
3473 void (*reduce_func)(void *lhs_data, void *rhs_data),
3474 kmp_critical_name *lck) {
3475
3476 KMP_COUNT_BLOCK(REDUCE_nowait);
3477 int retval = 0;
3478 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3479 kmp_info_t *th;
3480 kmp_team_t *team;
3481 int teams_swapped = 0, task_state;
3482 KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3483 __kmp_assert_valid_gtid(global_tid);
3484
3485 // why do we need this initialization here at all?
3486 // Reduction clause can not be used as a stand-alone directive.
3487
3488 // do not call __kmp_serial_initialize(), it will be called by
3489 // __kmp_parallel_initialize() if needed
3490 // possible detection of false-positive race by the threadchecker ???
3491 if (!TCR_4(__kmp_init_parallel))
3492 __kmp_parallel_initialize();
3493
3494 __kmp_resume_if_soft_paused();
3495
3496// check correctness of reduce block nesting
3497#if KMP_USE_DYNAMIC_LOCK
3498 if (__kmp_env_consistency_check)
3499 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3500#else
3501 if (__kmp_env_consistency_check)
3502 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3503#endif
3504
3505 th = __kmp_thread_from_gtid(global_tid);
3506 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3507
3508 // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3509 // the value should be kept in a variable
3510 // the variable should be either a construct-specific or thread-specific
3511 // property, not a team specific property
3512 // (a thread can reach the next reduce block on the next construct, reduce
3513 // method may differ on the next construct)
3514 // an ident_t "loc" parameter could be used as a construct-specific property
3515 // (what if loc == 0?)
3516 // (if both construct-specific and team-specific variables were shared,
3517 // then unness extra syncs should be needed)
3518 // a thread-specific variable is better regarding two issues above (next
3519 // construct and extra syncs)
3520 // a thread-specific "th_local.reduction_method" variable is used currently
3521 // each thread executes 'determine' and 'set' lines (no need to execute by one
3522 // thread, to avoid unness extra syncs)
3523
3524 packed_reduction_method = __kmp_determine_reduction_method(
3525 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3526 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3527
3528 OMPT_REDUCTION_DECL(th, global_tid);
3529 if (packed_reduction_method == critical_reduce_block) {
3530
3531 OMPT_REDUCTION_BEGIN;
3532
3533 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3534 retval = 1;
3535
3536 } else if (packed_reduction_method == empty_reduce_block) {
3537
3538 OMPT_REDUCTION_BEGIN;
3539
3540 // usage: if team size == 1, no synchronization is required ( Intel
3541 // platforms only )
3542 retval = 1;
3543
3544 } else if (packed_reduction_method == atomic_reduce_block) {
3545
3546 retval = 2;
3547
3548 // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3549 // won't be called by the code gen)
3550 // (it's not quite good, because the checking block has been closed by
3551 // this 'pop',
3552 // but atomic operation has not been executed yet, will be executed
3553 // slightly later, literally on next instruction)
3554 if (__kmp_env_consistency_check)
3555 __kmp_pop_sync(global_tid, ct_reduce, loc);
3556
3557 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3558 tree_reduce_block)) {
3559
3560// AT: performance issue: a real barrier here
3561// AT: (if primary thread is slow, other threads are blocked here waiting for
3562// the primary thread to come and release them)
3563// AT: (it's not what a customer might expect specifying NOWAIT clause)
3564// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3565// be confusing to a customer)
3566// AT: another implementation of *barrier_gather*nowait() (or some other design)
3567// might go faster and be more in line with sense of NOWAIT
3568// AT: TO DO: do epcc test and compare times
3569
3570// this barrier should be invisible to a customer and to the threading profile
3571// tool (it's neither a terminating barrier nor customer's code, it's
3572// used for an internal purpose)
3573#if OMPT_SUPPORT
3574 // JP: can this barrier potentially leed to task scheduling?
3575 // JP: as long as there is a barrier in the implementation, OMPT should and
3576 // will provide the barrier events
3577 // so we set-up the necessary frame/return addresses.
3578 ompt_frame_t *ompt_frame;
3579 if (ompt_enabled.enabled) {
3580 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3581 if (ompt_frame->enter_frame.ptr == NULL)
3582 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3583 }
3584 OMPT_STORE_RETURN_ADDRESS(global_tid);
3585#endif
3586#if USE_ITT_NOTIFY
3587 __kmp_threads[global_tid]->th.th_ident = loc;
3588#endif
3589 retval =
3590 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3591 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3592 retval = (retval != 0) ? (0) : (1);
3593#if OMPT_SUPPORT && OMPT_OPTIONAL
3594 if (ompt_enabled.enabled) {
3595 ompt_frame->enter_frame = ompt_data_none;
3596 }
3597#endif
3598
3599 // all other workers except primary thread should do this pop here
3600 // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3601 if (__kmp_env_consistency_check) {
3602 if (retval == 0) {
3603 __kmp_pop_sync(global_tid, ct_reduce, loc);
3604 }
3605 }
3606
3607 } else {
3608
3609 // should never reach this block
3610 KMP_ASSERT(0); // "unexpected method"
3611 }
3612 if (teams_swapped) {
3613 __kmp_restore_swapped_teams(th, team, task_state);
3614 }
3615 KA_TRACE(
3616 10,
3617 ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3618 global_tid, packed_reduction_method, retval));
3619
3620 return retval;
3621}
3622
3631void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3632 kmp_critical_name *lck) {
3633
3634 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3635
3636 KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3637 __kmp_assert_valid_gtid(global_tid);
3638
3639 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3640
3641 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3642
3643 if (packed_reduction_method == critical_reduce_block) {
3644
3645 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3646 OMPT_REDUCTION_END;
3647
3648 } else if (packed_reduction_method == empty_reduce_block) {
3649
3650 // usage: if team size == 1, no synchronization is required ( on Intel
3651 // platforms only )
3652
3653 OMPT_REDUCTION_END;
3654
3655 } else if (packed_reduction_method == atomic_reduce_block) {
3656
3657 // neither primary thread nor other workers should get here
3658 // (code gen does not generate this call in case 2: atomic reduce block)
3659 // actually it's better to remove this elseif at all;
3660 // after removal this value will checked by the 'else' and will assert
3661
3662 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3663 tree_reduce_block)) {
3664
3665 // only primary thread gets here
3666 // OMPT: tree reduction is annotated in the barrier code
3667
3668 } else {
3669
3670 // should never reach this block
3671 KMP_ASSERT(0); // "unexpected method"
3672 }
3673
3674 if (__kmp_env_consistency_check)
3675 __kmp_pop_sync(global_tid, ct_reduce, loc);
3676
3677 KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3678 global_tid, packed_reduction_method));
3679
3680 return;
3681}
3682
3683/* 2.a.ii. Reduce Block with a terminating barrier */
3684
3700kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3701 size_t reduce_size, void *reduce_data,
3702 void (*reduce_func)(void *lhs_data, void *rhs_data),
3703 kmp_critical_name *lck) {
3704 KMP_COUNT_BLOCK(REDUCE_wait);
3705 int retval = 0;
3706 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3707 kmp_info_t *th;
3708 kmp_team_t *team;
3709 int teams_swapped = 0, task_state;
3710
3711 KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3712 __kmp_assert_valid_gtid(global_tid);
3713
3714 // why do we need this initialization here at all?
3715 // Reduction clause can not be a stand-alone directive.
3716
3717 // do not call __kmp_serial_initialize(), it will be called by
3718 // __kmp_parallel_initialize() if needed
3719 // possible detection of false-positive race by the threadchecker ???
3720 if (!TCR_4(__kmp_init_parallel))
3721 __kmp_parallel_initialize();
3722
3723 __kmp_resume_if_soft_paused();
3724
3725// check correctness of reduce block nesting
3726#if KMP_USE_DYNAMIC_LOCK
3727 if (__kmp_env_consistency_check)
3728 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3729#else
3730 if (__kmp_env_consistency_check)
3731 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3732#endif
3733
3734 th = __kmp_thread_from_gtid(global_tid);
3735 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3736
3737 packed_reduction_method = __kmp_determine_reduction_method(
3738 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3739 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3740
3741 OMPT_REDUCTION_DECL(th, global_tid);
3742
3743 if (packed_reduction_method == critical_reduce_block) {
3744
3745 OMPT_REDUCTION_BEGIN;
3746 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3747 retval = 1;
3748
3749 } else if (packed_reduction_method == empty_reduce_block) {
3750
3751 OMPT_REDUCTION_BEGIN;
3752 // usage: if team size == 1, no synchronization is required ( Intel
3753 // platforms only )
3754 retval = 1;
3755
3756 } else if (packed_reduction_method == atomic_reduce_block) {
3757
3758 retval = 2;
3759
3760 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3761 tree_reduce_block)) {
3762
3763// case tree_reduce_block:
3764// this barrier should be visible to a customer and to the threading profile
3765// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3766#if OMPT_SUPPORT
3767 ompt_frame_t *ompt_frame;
3768 if (ompt_enabled.enabled) {
3769 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3770 if (ompt_frame->enter_frame.ptr == NULL)
3771 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3772 }
3773 OMPT_STORE_RETURN_ADDRESS(global_tid);
3774#endif
3775#if USE_ITT_NOTIFY
3776 __kmp_threads[global_tid]->th.th_ident =
3777 loc; // needed for correct notification of frames
3778#endif
3779 retval =
3780 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3781 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3782 retval = (retval != 0) ? (0) : (1);
3783#if OMPT_SUPPORT && OMPT_OPTIONAL
3784 if (ompt_enabled.enabled) {
3785 ompt_frame->enter_frame = ompt_data_none;
3786 }
3787#endif
3788
3789 // all other workers except primary thread should do this pop here
3790 // (none of other workers except primary will enter __kmpc_end_reduce())
3791 if (__kmp_env_consistency_check) {
3792 if (retval == 0) { // 0: all other workers; 1: primary thread
3793 __kmp_pop_sync(global_tid, ct_reduce, loc);
3794 }
3795 }
3796
3797 } else {
3798
3799 // should never reach this block
3800 KMP_ASSERT(0); // "unexpected method"
3801 }
3802 if (teams_swapped) {
3803 __kmp_restore_swapped_teams(th, team, task_state);
3804 }
3805
3806 KA_TRACE(10,
3807 ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3808 global_tid, packed_reduction_method, retval));
3809 return retval;
3810}
3811
3822void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3823 kmp_critical_name *lck) {
3824
3825 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3826 kmp_info_t *th;
3827 kmp_team_t *team;
3828 int teams_swapped = 0, task_state;
3829
3830 KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3831 __kmp_assert_valid_gtid(global_tid);
3832
3833 th = __kmp_thread_from_gtid(global_tid);
3834 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3835
3836 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3837
3838 // this barrier should be visible to a customer and to the threading profile
3839 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3840 OMPT_REDUCTION_DECL(th, global_tid);
3841
3842 if (packed_reduction_method == critical_reduce_block) {
3843 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3844
3845 OMPT_REDUCTION_END;
3846
3847// TODO: implicit barrier: should be exposed
3848#if OMPT_SUPPORT
3849 ompt_frame_t *ompt_frame;
3850 if (ompt_enabled.enabled) {
3851 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3852 if (ompt_frame->enter_frame.ptr == NULL)
3853 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3854 }
3855 OMPT_STORE_RETURN_ADDRESS(global_tid);
3856#endif
3857#if USE_ITT_NOTIFY
3858 __kmp_threads[global_tid]->th.th_ident = loc;
3859#endif
3860 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3861#if OMPT_SUPPORT && OMPT_OPTIONAL
3862 if (ompt_enabled.enabled) {
3863 ompt_frame->enter_frame = ompt_data_none;
3864 }
3865#endif
3866
3867 } else if (packed_reduction_method == empty_reduce_block) {
3868
3869 OMPT_REDUCTION_END;
3870
3871// usage: if team size==1, no synchronization is required (Intel platforms only)
3872
3873// TODO: implicit barrier: should be exposed
3874#if OMPT_SUPPORT
3875 ompt_frame_t *ompt_frame;
3876 if (ompt_enabled.enabled) {
3877 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3878 if (ompt_frame->enter_frame.ptr == NULL)
3879 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3880 }
3881 OMPT_STORE_RETURN_ADDRESS(global_tid);
3882#endif
3883#if USE_ITT_NOTIFY
3884 __kmp_threads[global_tid]->th.th_ident = loc;
3885#endif
3886 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3887#if OMPT_SUPPORT && OMPT_OPTIONAL
3888 if (ompt_enabled.enabled) {
3889 ompt_frame->enter_frame = ompt_data_none;
3890 }
3891#endif
3892
3893 } else if (packed_reduction_method == atomic_reduce_block) {
3894
3895#if OMPT_SUPPORT
3896 ompt_frame_t *ompt_frame;
3897 if (ompt_enabled.enabled) {
3898 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3899 if (ompt_frame->enter_frame.ptr == NULL)
3900 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3901 }
3902 OMPT_STORE_RETURN_ADDRESS(global_tid);
3903#endif
3904// TODO: implicit barrier: should be exposed
3905#if USE_ITT_NOTIFY
3906 __kmp_threads[global_tid]->th.th_ident = loc;
3907#endif
3908 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3909#if OMPT_SUPPORT && OMPT_OPTIONAL
3910 if (ompt_enabled.enabled) {
3911 ompt_frame->enter_frame = ompt_data_none;
3912 }
3913#endif
3914
3915 } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3916 tree_reduce_block)) {
3917
3918 // only primary thread executes here (primary releases all other workers)
3919 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3920 global_tid);
3921
3922 } else {
3923
3924 // should never reach this block
3925 KMP_ASSERT(0); // "unexpected method"
3926 }
3927 if (teams_swapped) {
3928 __kmp_restore_swapped_teams(th, team, task_state);
3929 }
3930
3931 if (__kmp_env_consistency_check)
3932 __kmp_pop_sync(global_tid, ct_reduce, loc);
3933
3934 KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3935 global_tid, packed_reduction_method));
3936
3937 return;
3938}
3939
3940#undef __KMP_GET_REDUCTION_METHOD
3941#undef __KMP_SET_REDUCTION_METHOD
3942
3943/* end of interface to fast scalable reduce routines */
3944
3945kmp_uint64 __kmpc_get_taskid() {
3946
3947 kmp_int32 gtid;
3948 kmp_info_t *thread;
3949
3950 gtid = __kmp_get_gtid();
3951 if (gtid < 0) {
3952 return 0;
3953 }
3954 thread = __kmp_thread_from_gtid(gtid);
3955 return thread->th.th_current_task->td_task_id;
3956
3957} // __kmpc_get_taskid
3958
3959kmp_uint64 __kmpc_get_parent_taskid() {
3960
3961 kmp_int32 gtid;
3962 kmp_info_t *thread;
3963 kmp_taskdata_t *parent_task;
3964
3965 gtid = __kmp_get_gtid();
3966 if (gtid < 0) {
3967 return 0;
3968 }
3969 thread = __kmp_thread_from_gtid(gtid);
3970 parent_task = thread->th.th_current_task->td_parent;
3971 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3972
3973} // __kmpc_get_parent_taskid
3974
3986void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3987 const struct kmp_dim *dims) {
3988 __kmp_assert_valid_gtid(gtid);
3989 int j, idx;
3990 kmp_int64 last, trace_count;
3991 kmp_info_t *th = __kmp_threads[gtid];
3992 kmp_team_t *team = th->th.th_team;
3993 kmp_uint32 *flags;
3994 kmp_disp_t *pr_buf = th->th.th_dispatch;
3995 dispatch_shared_info_t *sh_buf;
3996
3997 KA_TRACE(
3998 20,
3999 ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4000 gtid, num_dims, !team->t.t_serialized));
4001 KMP_DEBUG_ASSERT(dims != NULL);
4002 KMP_DEBUG_ASSERT(num_dims > 0);
4003
4004 if (team->t.t_serialized) {
4005 KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4006 return; // no dependencies if team is serialized
4007 }
4008 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4009 idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4010 // the next loop
4011 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4012
4013 // Save bounds info into allocated private buffer
4014 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4015 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4016 th, sizeof(kmp_int64) * (4 * num_dims + 1));
4017 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4018 pr_buf->th_doacross_info[0] =
4019 (kmp_int64)num_dims; // first element is number of dimensions
4020 // Save also address of num_done in order to access it later without knowing
4021 // the buffer index
4022 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4023 pr_buf->th_doacross_info[2] = dims[0].lo;
4024 pr_buf->th_doacross_info[3] = dims[0].up;
4025 pr_buf->th_doacross_info[4] = dims[0].st;
4026 last = 5;
4027 for (j = 1; j < num_dims; ++j) {
4028 kmp_int64
4029 range_length; // To keep ranges of all dimensions but the first dims[0]
4030 if (dims[j].st == 1) { // most common case
4031 // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4032 range_length = dims[j].up - dims[j].lo + 1;
4033 } else {
4034 if (dims[j].st > 0) {
4035 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4036 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4037 } else { // negative increment
4038 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4039 range_length =
4040 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4041 }
4042 }
4043 pr_buf->th_doacross_info[last++] = range_length;
4044 pr_buf->th_doacross_info[last++] = dims[j].lo;
4045 pr_buf->th_doacross_info[last++] = dims[j].up;
4046 pr_buf->th_doacross_info[last++] = dims[j].st;
4047 }
4048
4049 // Compute total trip count.
4050 // Start with range of dims[0] which we don't need to keep in the buffer.
4051 if (dims[0].st == 1) { // most common case
4052 trace_count = dims[0].up - dims[0].lo + 1;
4053 } else if (dims[0].st > 0) {
4054 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4055 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4056 } else { // negative increment
4057 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4058 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4059 }
4060 for (j = 1; j < num_dims; ++j) {
4061 trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4062 }
4063 KMP_DEBUG_ASSERT(trace_count > 0);
4064
4065 // Check if shared buffer is not occupied by other loop (idx -
4066 // __kmp_dispatch_num_buffers)
4067 if (idx != sh_buf->doacross_buf_idx) {
4068 // Shared buffer is occupied, wait for it to be free
4069 __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4070 __kmp_eq_4, NULL);
4071 }
4072#if KMP_32_BIT_ARCH
4073 // Check if we are the first thread. After the CAS the first thread gets 0,
4074 // others get 1 if initialization is in progress, allocated pointer otherwise.
4075 // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4076 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4077 (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4078#else
4079 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4080 (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4081#endif
4082 if (flags == NULL) {
4083 // we are the first thread, allocate the array of flags
4084 size_t size =
4085 (size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4086 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4087 KMP_MB();
4088 sh_buf->doacross_flags = flags;
4089 } else if (flags == (kmp_uint32 *)1) {
4090#if KMP_32_BIT_ARCH
4091 // initialization is still in progress, need to wait
4092 while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4093#else
4094 while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4095#endif
4096 KMP_YIELD(TRUE);
4097 KMP_MB();
4098 } else {
4099 KMP_MB();
4100 }
4101 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4102 pr_buf->th_doacross_flags =
4103 sh_buf->doacross_flags; // save private copy in order to not
4104 // touch shared buffer on each iteration
4105 KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4106}
4107
4108void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4109 __kmp_assert_valid_gtid(gtid);
4110 kmp_int64 shft;
4111 size_t num_dims, i;
4112 kmp_uint32 flag;
4113 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4114 kmp_info_t *th = __kmp_threads[gtid];
4115 kmp_team_t *team = th->th.th_team;
4116 kmp_disp_t *pr_buf;
4117 kmp_int64 lo, up, st;
4118
4119 KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4120 if (team->t.t_serialized) {
4121 KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4122 return; // no dependencies if team is serialized
4123 }
4124
4125 // calculate sequential iteration number and check out-of-bounds condition
4126 pr_buf = th->th.th_dispatch;
4127 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4128 num_dims = (size_t)pr_buf->th_doacross_info[0];
4129 lo = pr_buf->th_doacross_info[2];
4130 up = pr_buf->th_doacross_info[3];
4131 st = pr_buf->th_doacross_info[4];
4132#if OMPT_SUPPORT && OMPT_OPTIONAL
4133 ompt_dependence_t deps[num_dims];
4134#endif
4135 if (st == 1) { // most common case
4136 if (vec[0] < lo || vec[0] > up) {
4137 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4138 "bounds [%lld,%lld]\n",
4139 gtid, vec[0], lo, up));
4140 return;
4141 }
4142 iter_number = vec[0] - lo;
4143 } else if (st > 0) {
4144 if (vec[0] < lo || vec[0] > up) {
4145 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4146 "bounds [%lld,%lld]\n",
4147 gtid, vec[0], lo, up));
4148 return;
4149 }
4150 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4151 } else { // negative increment
4152 if (vec[0] > lo || vec[0] < up) {
4153 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4154 "bounds [%lld,%lld]\n",
4155 gtid, vec[0], lo, up));
4156 return;
4157 }
4158 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4159 }
4160#if OMPT_SUPPORT && OMPT_OPTIONAL
4161 deps[0].variable.value = iter_number;
4162 deps[0].dependence_type = ompt_dependence_type_sink;
4163#endif
4164 for (i = 1; i < num_dims; ++i) {
4165 kmp_int64 iter, ln;
4166 size_t j = i * 4;
4167 ln = pr_buf->th_doacross_info[j + 1];
4168 lo = pr_buf->th_doacross_info[j + 2];
4169 up = pr_buf->th_doacross_info[j + 3];
4170 st = pr_buf->th_doacross_info[j + 4];
4171 if (st == 1) {
4172 if (vec[i] < lo || vec[i] > up) {
4173 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4174 "bounds [%lld,%lld]\n",
4175 gtid, vec[i], lo, up));
4176 return;
4177 }
4178 iter = vec[i] - lo;
4179 } else if (st > 0) {
4180 if (vec[i] < lo || vec[i] > up) {
4181 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4182 "bounds [%lld,%lld]\n",
4183 gtid, vec[i], lo, up));
4184 return;
4185 }
4186 iter = (kmp_uint64)(vec[i] - lo) / st;
4187 } else { // st < 0
4188 if (vec[i] > lo || vec[i] < up) {
4189 KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4190 "bounds [%lld,%lld]\n",
4191 gtid, vec[i], lo, up));
4192 return;
4193 }
4194 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4195 }
4196 iter_number = iter + ln * iter_number;
4197#if OMPT_SUPPORT && OMPT_OPTIONAL
4198 deps[i].variable.value = iter;
4199 deps[i].dependence_type = ompt_dependence_type_sink;
4200#endif
4201 }
4202 shft = iter_number % 32; // use 32-bit granularity
4203 iter_number >>= 5; // divided by 32
4204 flag = 1 << shft;
4205 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4206 KMP_YIELD(TRUE);
4207 }
4208 KMP_MB();
4209#if OMPT_SUPPORT && OMPT_OPTIONAL
4210 if (ompt_enabled.ompt_callback_dependences) {
4211 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4212 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4213 }
4214#endif
4215 KA_TRACE(20,
4216 ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4217 gtid, (iter_number << 5) + shft));
4218}
4219
4220void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4221 __kmp_assert_valid_gtid(gtid);
4222 kmp_int64 shft;
4223 size_t num_dims, i;
4224 kmp_uint32 flag;
4225 kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4226 kmp_info_t *th = __kmp_threads[gtid];
4227 kmp_team_t *team = th->th.th_team;
4228 kmp_disp_t *pr_buf;
4229 kmp_int64 lo, st;
4230
4231 KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4232 if (team->t.t_serialized) {
4233 KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4234 return; // no dependencies if team is serialized
4235 }
4236
4237 // calculate sequential iteration number (same as in "wait" but no
4238 // out-of-bounds checks)
4239 pr_buf = th->th.th_dispatch;
4240 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4241 num_dims = (size_t)pr_buf->th_doacross_info[0];
4242 lo = pr_buf->th_doacross_info[2];
4243 st = pr_buf->th_doacross_info[4];
4244#if OMPT_SUPPORT && OMPT_OPTIONAL
4245 ompt_dependence_t deps[num_dims];
4246#endif
4247 if (st == 1) { // most common case
4248 iter_number = vec[0] - lo;
4249 } else if (st > 0) {
4250 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4251 } else { // negative increment
4252 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4253 }
4254#if OMPT_SUPPORT && OMPT_OPTIONAL
4255 deps[0].variable.value = iter_number;
4256 deps[0].dependence_type = ompt_dependence_type_source;
4257#endif
4258 for (i = 1; i < num_dims; ++i) {
4259 kmp_int64 iter, ln;
4260 size_t j = i * 4;
4261 ln = pr_buf->th_doacross_info[j + 1];
4262 lo = pr_buf->th_doacross_info[j + 2];
4263 st = pr_buf->th_doacross_info[j + 4];
4264 if (st == 1) {
4265 iter = vec[i] - lo;
4266 } else if (st > 0) {
4267 iter = (kmp_uint64)(vec[i] - lo) / st;
4268 } else { // st < 0
4269 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4270 }
4271 iter_number = iter + ln * iter_number;
4272#if OMPT_SUPPORT && OMPT_OPTIONAL
4273 deps[i].variable.value = iter;
4274 deps[i].dependence_type = ompt_dependence_type_source;
4275#endif
4276 }
4277#if OMPT_SUPPORT && OMPT_OPTIONAL
4278 if (ompt_enabled.ompt_callback_dependences) {
4279 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4280 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4281 }
4282#endif
4283 shft = iter_number % 32; // use 32-bit granularity
4284 iter_number >>= 5; // divided by 32
4285 flag = 1 << shft;
4286 KMP_MB();
4287 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4288 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4289 KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4290 (iter_number << 5) + shft));
4291}
4292
4293void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4294 __kmp_assert_valid_gtid(gtid);
4295 kmp_int32 num_done;
4296 kmp_info_t *th = __kmp_threads[gtid];
4297 kmp_team_t *team = th->th.th_team;
4298 kmp_disp_t *pr_buf = th->th.th_dispatch;
4299
4300 KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4301 if (team->t.t_serialized) {
4302 KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4303 return; // nothing to do
4304 }
4305 num_done =
4306 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4307 if (num_done == th->th.th_team_nproc) {
4308 // we are the last thread, need to free shared resources
4309 int idx = pr_buf->th_doacross_buf_idx - 1;
4310 dispatch_shared_info_t *sh_buf =
4311 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4312 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4313 (kmp_int64)&sh_buf->doacross_num_done);
4314 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4315 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4316 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4317 sh_buf->doacross_flags = NULL;
4318 sh_buf->doacross_num_done = 0;
4319 sh_buf->doacross_buf_idx +=
4320 __kmp_dispatch_num_buffers; // free buffer for future re-use
4321 }
4322 // free private resources (need to keep buffer index forever)
4323 pr_buf->th_doacross_flags = NULL;
4324 __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4325 pr_buf->th_doacross_info = NULL;
4326 KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4327}
4328
4329/* OpenMP 5.1 Memory Management routines */
4330void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4331 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4332}
4333
4334void *omp_aligned_alloc(size_t align, size_t size,
4335 omp_allocator_handle_t allocator) {
4336 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4337}
4338
4339void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4340 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4341}
4342
4343void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4344 omp_allocator_handle_t allocator) {
4345 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4346}
4347
4348void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4349 omp_allocator_handle_t free_allocator) {
4350 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4351 free_allocator);
4352}
4353
4354void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4355 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4356}
4357/* end of OpenMP 5.1 Memory Management routines */
4358
4359int __kmpc_get_target_offload(void) {
4360 if (!__kmp_init_serial) {
4361 __kmp_serial_initialize();
4362 }
4363 return __kmp_target_offload;
4364}
4365
4366int __kmpc_pause_resource(kmp_pause_status_t level) {
4367 if (!__kmp_init_serial) {
4368 return 1; // Can't pause if runtime is not initialized
4369 }
4370 return __kmp_pause_resource(level);
4371}
4372
4373void __kmpc_error(ident_t *loc, int severity, const char *message) {
4374 if (!__kmp_init_serial)
4375 __kmp_serial_initialize();
4376
4377 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4378
4379#if OMPT_SUPPORT
4380 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4381 ompt_callbacks.ompt_callback(ompt_callback_error)(
4382 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4383 OMPT_GET_RETURN_ADDRESS(0));
4384 }
4385#endif // OMPT_SUPPORT
4386
4387 char *src_loc;
4388 if (loc && loc->psource) {
4389 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4390 src_loc =
4391 __kmp_str_format("%s:%s:%s", str_loc.file, str_loc.line, str_loc.col);
4392 __kmp_str_loc_free(&str_loc);
4393 } else {
4394 src_loc = __kmp_str_format("unknown");
4395 }
4396
4397 if (severity == severity_warning)
4398 KMP_WARNING(UserDirectedWarning, src_loc, message);
4399 else
4400 KMP_FATAL(UserDirectedError, src_loc, message);
4401
4402 __kmp_str_free(&src_loc);
4403}
4404
4405// Mark begin of scope directive.
4406void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4407// reserved is for extension of scope directive and not used.
4408#if OMPT_SUPPORT && OMPT_OPTIONAL
4409 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4410 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4411 int tid = __kmp_tid_from_gtid(gtid);
4412 ompt_callbacks.ompt_callback(ompt_callback_work)(
4413 ompt_work_scope, ompt_scope_begin,
4414 &(team->t.ompt_team_info.parallel_data),
4415 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4416 OMPT_GET_RETURN_ADDRESS(0));
4417 }
4418#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4419}
4420
4421// Mark end of scope directive
4422void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4423// reserved is for extension of scope directive and not used.
4424#if OMPT_SUPPORT && OMPT_OPTIONAL
4425 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4426 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4427 int tid = __kmp_tid_from_gtid(gtid);
4428 ompt_callbacks.ompt_callback(ompt_callback_work)(
4429 ompt_work_scope, ompt_scope_end,
4430 &(team->t.ompt_team_info.parallel_data),
4431 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4432 OMPT_GET_RETURN_ADDRESS(0));
4433 }
4434#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4435}
4436
4437#ifdef KMP_USE_VERSION_SYMBOLS
4438// For GOMP compatibility there are two versions of each omp_* API.
4439// One is the plain C symbol and one is the Fortran symbol with an appended
4440// underscore. When we implement a specific ompc_* version of an omp_*
4441// function, we want the plain GOMP versioned symbol to alias the ompc_* version
4442// instead of the Fortran versions in kmp_ftn_entry.h
4443extern "C" {
4444// Have to undef these from omp.h so they aren't translated into
4445// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4446#ifdef omp_set_affinity_format
4447#undef omp_set_affinity_format
4448#endif
4449#ifdef omp_get_affinity_format
4450#undef omp_get_affinity_format
4451#endif
4452#ifdef omp_display_affinity
4453#undef omp_display_affinity
4454#endif
4455#ifdef omp_capture_affinity
4456#undef omp_capture_affinity
4457#endif
4458KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4459 "OMP_5.0");
4460KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4461 "OMP_5.0");
4462KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4463 "OMP_5.0");
4464KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4465 "OMP_5.0");
4466} // extern "C"
4467#endif
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:214
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:216
@ KMP_IDENT_AUTOPAR
Definition: kmp.h:199
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:218
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1595
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:908
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp.h:234
char const * psource
Definition: kmp.h:244
kmp_int32 flags
Definition: kmp.h:236