LLVM OpenMP* Runtime Library
kmp_atomic.cpp
1/*
2 * kmp_atomic.cpp -- ATOMIC implementation routines
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp_atomic.h"
14#include "kmp.h" // TRUE, asm routines prototypes
15
16typedef unsigned char uchar;
17typedef unsigned short ushort;
18
561/*
562 * Global vars
563 */
564
565#ifndef KMP_GOMP_COMPAT
566int __kmp_atomic_mode = 1; // Intel perf
567#else
568int __kmp_atomic_mode = 2; // GOMP compatibility
569#endif /* KMP_GOMP_COMPAT */
570
571KMP_ALIGN(128)
572
573// Control access to all user coded atomics in Gnu compat mode
574kmp_atomic_lock_t __kmp_atomic_lock;
575// Control access to all user coded atomics for 1-byte fixed data types
576kmp_atomic_lock_t __kmp_atomic_lock_1i;
577// Control access to all user coded atomics for 2-byte fixed data types
578kmp_atomic_lock_t __kmp_atomic_lock_2i;
579// Control access to all user coded atomics for 4-byte fixed data types
580kmp_atomic_lock_t __kmp_atomic_lock_4i;
581// Control access to all user coded atomics for kmp_real32 data type
582kmp_atomic_lock_t __kmp_atomic_lock_4r;
583// Control access to all user coded atomics for 8-byte fixed data types
584kmp_atomic_lock_t __kmp_atomic_lock_8i;
585// Control access to all user coded atomics for kmp_real64 data type
586kmp_atomic_lock_t __kmp_atomic_lock_8r;
587// Control access to all user coded atomics for complex byte data type
588kmp_atomic_lock_t __kmp_atomic_lock_8c;
589// Control access to all user coded atomics for long double data type
590kmp_atomic_lock_t __kmp_atomic_lock_10r;
591// Control access to all user coded atomics for _Quad data type
592kmp_atomic_lock_t __kmp_atomic_lock_16r;
593// Control access to all user coded atomics for double complex data type
594kmp_atomic_lock_t __kmp_atomic_lock_16c;
595// Control access to all user coded atomics for long double complex type
596kmp_atomic_lock_t __kmp_atomic_lock_20c;
597// Control access to all user coded atomics for _Quad complex data type
598kmp_atomic_lock_t __kmp_atomic_lock_32c;
599
600/* 2007-03-02:
601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602 on *_32 and *_32e. This is just a temporary workaround for the problem. It
603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604 in assembler language. */
605#define KMP_ATOMIC_VOLATILE volatile
606
607#if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608
609static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610 return lhs.q + rhs.q;
611}
612static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613 return lhs.q - rhs.q;
614}
615static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616 return lhs.q * rhs.q;
617}
618static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619 return lhs.q / rhs.q;
620}
621static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622 return lhs.q < rhs.q;
623}
624static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625 return lhs.q > rhs.q;
626}
627
628static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629 return lhs.q + rhs.q;
630}
631static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632 return lhs.q - rhs.q;
633}
634static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635 return lhs.q * rhs.q;
636}
637static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638 return lhs.q / rhs.q;
639}
640static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641 return lhs.q < rhs.q;
642}
643static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644 return lhs.q > rhs.q;
645}
646
647static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648 kmp_cmplx128_a4_t &rhs) {
649 return lhs.q + rhs.q;
650}
651static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652 kmp_cmplx128_a4_t &rhs) {
653 return lhs.q - rhs.q;
654}
655static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656 kmp_cmplx128_a4_t &rhs) {
657 return lhs.q * rhs.q;
658}
659static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660 kmp_cmplx128_a4_t &rhs) {
661 return lhs.q / rhs.q;
662}
663
664static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665 kmp_cmplx128_a16_t &rhs) {
666 return lhs.q + rhs.q;
667}
668static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669 kmp_cmplx128_a16_t &rhs) {
670 return lhs.q - rhs.q;
671}
672static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673 kmp_cmplx128_a16_t &rhs) {
674 return lhs.q * rhs.q;
675}
676static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677 kmp_cmplx128_a16_t &rhs) {
678 return lhs.q / rhs.q;
679}
680
681#endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682
683// ATOMIC implementation routines -----------------------------------------
684// One routine for each operation and operand type.
685// All routines declarations looks like
686// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687
688#define KMP_CHECK_GTID \
689 if (gtid == KMP_GTID_UNKNOWN) { \
690 gtid = __kmp_entry_gtid(); \
691 } // check and get gtid when needed
692
693// Beginning of a definition (provides name, parameters, gebug trace)
694// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695// fixed)
696// OP_ID - operation identifier (add, sub, mul, ...)
697// TYPE - operands' type
698#define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
699 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
700 TYPE *lhs, TYPE rhs) { \
701 KMP_DEBUG_ASSERT(__kmp_init_serial); \
702 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703
704// ------------------------------------------------------------------------
705// Lock variables used for critical sections for various size operands
706#define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707#define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708#define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709#define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710#define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711#define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712#define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713#define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714#define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715#define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716#define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717#define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718#define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719
720// ------------------------------------------------------------------------
721// Operation on *lhs, rhs bound by critical section
722// OP - operator (it's supposed to contain an assignment)
723// LCK_ID - lock identifier
724// Note: don't check gtid as it should always be valid
725// 1, 2-byte - expect valid parameter, other - check before this macro
726#define OP_CRITICAL(OP, LCK_ID) \
727 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
728 \
729 (*lhs) OP(rhs); \
730 \
731 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732
733#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
734 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
735 (*lhs) = (TYPE)((*lhs)OP rhs); \
736 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737
738// ------------------------------------------------------------------------
739// For GNU compatibility, we may need to use a critical section,
740// even though it is not required by the ISA.
741//
742// On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744// critical section. On Intel(R) 64, all atomic operations are done with fetch
745// and add or compare and exchange. Therefore, the FLAG parameter to this
746// macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747// require a critical section, where we predict that they will be implemented
748// in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749//
750// When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751// the FLAG parameter should always be 1. If we know that we will be using
752// a critical section, then we want to make certain that we use the generic
753// lock __kmp_atomic_lock to protect the atomic update, and not of of the
754// locks that are specialized based upon the size or type of the data.
755//
756// If FLAG is 0, then we are relying on dead code elimination by the build
757// compiler to get rid of the useless block of code, and save a needless
758// branch at runtime.
759
760#ifdef KMP_GOMP_COMPAT
761#define OP_GOMP_CRITICAL(OP, FLAG) \
762 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
763 KMP_CHECK_GTID; \
764 OP_CRITICAL(OP, 0); \
765 return; \
766 }
767
768#define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
769 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
770 KMP_CHECK_GTID; \
771 OP_UPDATE_CRITICAL(TYPE, OP, 0); \
772 return; \
773 }
774#else
775#define OP_GOMP_CRITICAL(OP, FLAG)
776#define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777#endif /* KMP_GOMP_COMPAT */
778
779#if KMP_MIC
780#define KMP_DO_PAUSE _mm_delay_32(1)
781#else
782#define KMP_DO_PAUSE
783#endif /* KMP_MIC */
784
785// ------------------------------------------------------------------------
786// Operation on *lhs, rhs using "compare_and_store" routine
787// TYPE - operands' type
788// BITS - size in bits, used to distinguish low level calls
789// OP - operator
790#define OP_CMPXCHG(TYPE, BITS, OP) \
791 { \
792 TYPE old_value, new_value; \
793 old_value = *(TYPE volatile *)lhs; \
794 new_value = (TYPE)(old_value OP rhs); \
795 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
796 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
797 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
798 KMP_DO_PAUSE; \
799 \
800 old_value = *(TYPE volatile *)lhs; \
801 new_value = (TYPE)(old_value OP rhs); \
802 } \
803 }
804
805#if USE_CMPXCHG_FIX
806// 2007-06-25:
807// workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808// and win_32e are affected (I verified the asm). Compiler ignores the volatile
809// qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810// compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811// the workaround.
812#define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
813 { \
814 struct _sss { \
815 TYPE cmp; \
816 kmp_int##BITS *vvv; \
817 }; \
818 struct _sss old_value, new_value; \
819 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
820 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
821 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
822 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
823 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
824 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
825 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
826 KMP_DO_PAUSE; \
827 \
828 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
829 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
830 } \
831 }
832// end of the first part of the workaround for C78287
833#endif // USE_CMPXCHG_FIX
834
835#if KMP_OS_WINDOWS && KMP_ARCH_AARCH64
836// Undo explicit type casts to get MSVC ARM64 to build. Uses
837// OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838#undef OP_CMPXCHG
839#define OP_CMPXCHG(TYPE, BITS, OP) \
840 { \
841 struct _sss { \
842 TYPE cmp; \
843 kmp_int##BITS *vvv; \
844 }; \
845 struct _sss old_value, new_value; \
846 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
847 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
848 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
849 new_value.cmp = old_value.cmp OP rhs; \
850 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
851 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
852 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
853 KMP_DO_PAUSE; \
854 \
855 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
856 new_value.cmp = old_value.cmp OP rhs; \
857 } \
858 }
859
860#undef OP_UPDATE_CRITICAL
861#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
862 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
863 (*lhs) = (*lhs)OP rhs; \
864 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865
866#endif // KMP_OS_WINDOWS && KMP_ARCH_AARCH64
867
868#if KMP_ARCH_X86 || KMP_ARCH_X86_64
869
870// ------------------------------------------------------------------------
871// X86 or X86_64: no alignment problems ====================================
872#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
873 GOMP_FLAG) \
874 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
875 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
876 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
877 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
878 }
879// -------------------------------------------------------------------------
880#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
881 GOMP_FLAG) \
882 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
883 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
884 OP_CMPXCHG(TYPE, BITS, OP) \
885 }
886#if USE_CMPXCHG_FIX
887// -------------------------------------------------------------------------
888// workaround for C78287 (complex(kind=4) data type)
889#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
890 MASK, GOMP_FLAG) \
891 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
892 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
893 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
894 }
895// end of the second part of the workaround for C78287
896#endif // USE_CMPXCHG_FIX
897
898#else
899// -------------------------------------------------------------------------
900// Code for other architectures that don't handle unaligned accesses.
901#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
902 GOMP_FLAG) \
903 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
904 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
905 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
906 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
907 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
908 } else { \
909 KMP_CHECK_GTID; \
910 OP_UPDATE_CRITICAL(TYPE, OP, \
911 LCK_ID) /* unaligned address - use critical */ \
912 } \
913 }
914// -------------------------------------------------------------------------
915#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
916 GOMP_FLAG) \
917 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
918 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
919 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
920 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
921 } else { \
922 KMP_CHECK_GTID; \
923 OP_UPDATE_CRITICAL(TYPE, OP, \
924 LCK_ID) /* unaligned address - use critical */ \
925 } \
926 }
927#if USE_CMPXCHG_FIX
928// -------------------------------------------------------------------------
929// workaround for C78287 (complex(kind=4) data type)
930#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
931 MASK, GOMP_FLAG) \
932 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
933 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
934 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
935 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
936 } else { \
937 KMP_CHECK_GTID; \
938 OP_UPDATE_CRITICAL(TYPE, OP, \
939 LCK_ID) /* unaligned address - use critical */ \
940 } \
941 }
942// end of the second part of the workaround for C78287
943#endif // USE_CMPXCHG_FIX
944#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945
946// Routines for ATOMIC 4-byte operands addition and subtraction
947ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948 0) // __kmpc_atomic_fixed4_add
949ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950 0) // __kmpc_atomic_fixed4_sub
951
952ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953 KMP_ARCH_X86) // __kmpc_atomic_float4_add
954ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955 KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956
957// Routines for ATOMIC 8-byte operands addition and subtraction
958ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962
963ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964 KMP_ARCH_X86) // __kmpc_atomic_float8_add
965ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966 KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967
968// ------------------------------------------------------------------------
969// Entries definition for integer operands
970// TYPE_ID - operands type and size (fixed4, float4)
971// OP_ID - operation identifier (add, sub, mul, ...)
972// TYPE - operand type
973// BITS - size in bits, used to distinguish low level calls
974// OP - operator (used in critical section)
975// LCK_ID - lock identifier, used to possibly distinguish lock variable
976// MASK - used for alignment check
977
978// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
979// ------------------------------------------------------------------------
980// Routines for ATOMIC integer operands, other operators
981// ------------------------------------------------------------------------
982// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
983ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986 0) // __kmpc_atomic_fixed1_andb
987ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994 0) // __kmpc_atomic_fixed1_orb
995ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004 0) // __kmpc_atomic_fixed1_xor
1005ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008 0) // __kmpc_atomic_fixed2_andb
1009ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016 0) // __kmpc_atomic_fixed2_orb
1017ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026 0) // __kmpc_atomic_fixed2_xor
1027ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028 0) // __kmpc_atomic_fixed4_andb
1029ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036 0) // __kmpc_atomic_fixed4_orb
1037ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044 0) // __kmpc_atomic_fixed4_xor
1045ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064 KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066 KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068 KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070 KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1072
1073/* ------------------------------------------------------------------------ */
1074/* Routines for C/C++ Reduction operators && and || */
1075
1076// ------------------------------------------------------------------------
1077// Need separate macros for &&, || because there is no combined assignment
1078// TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079#define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1080 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1081 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1082 OP_CRITICAL(= *lhs OP, LCK_ID) \
1083 }
1084
1085#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086
1087// ------------------------------------------------------------------------
1088// X86 or X86_64: no alignment problems ===================================
1089#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1091 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1092 OP_CMPXCHG(TYPE, BITS, OP) \
1093 }
1094
1095#else
1096// ------------------------------------------------------------------------
1097// Code for other architectures that don't handle unaligned accesses.
1098#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1100 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1101 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1102 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1103 } else { \
1104 KMP_CHECK_GTID; \
1105 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1106 } \
1107 }
1108#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109
1110ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119 0) // __kmpc_atomic_fixed4_andl
1120ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121 0) // __kmpc_atomic_fixed4_orl
1122ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126
1127/* ------------------------------------------------------------------------- */
1128/* Routines for Fortran operators that matched no one in C: */
1129/* MAX, MIN, .EQV., .NEQV. */
1130/* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1131/* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1132
1133// -------------------------------------------------------------------------
1134// MIN and MAX need separate macros
1135// OP - operator to check if we need any actions?
1136#define MIN_MAX_CRITSECT(OP, LCK_ID) \
1137 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1138 \
1139 if (*lhs OP rhs) { /* still need actions? */ \
1140 *lhs = rhs; \
1141 } \
1142 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143
1144// -------------------------------------------------------------------------
1145#ifdef KMP_GOMP_COMPAT
1146#define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1147 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1148 KMP_CHECK_GTID; \
1149 MIN_MAX_CRITSECT(OP, 0); \
1150 return; \
1151 }
1152#else
1153#define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154#endif /* KMP_GOMP_COMPAT */
1155
1156// -------------------------------------------------------------------------
1157#define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1158 { \
1159 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1160 TYPE old_value; \
1161 temp_val = *lhs; \
1162 old_value = temp_val; \
1163 while (old_value OP rhs && /* still need actions? */ \
1164 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1165 (kmp_int##BITS *)lhs, \
1166 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1167 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1168 temp_val = *lhs; \
1169 old_value = temp_val; \
1170 } \
1171 }
1172
1173// -------------------------------------------------------------------------
1174// 1-byte, 2-byte operands - use critical section
1175#define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1176 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1177 if (*lhs OP rhs) { /* need actions? */ \
1178 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1179 MIN_MAX_CRITSECT(OP, LCK_ID) \
1180 } \
1181 }
1182
1183#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184
1185// -------------------------------------------------------------------------
1186// X86 or X86_64: no alignment problems ====================================
1187#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1188 GOMP_FLAG) \
1189 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1190 if (*lhs OP rhs) { \
1191 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1192 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1193 } \
1194 }
1195
1196#else
1197// -------------------------------------------------------------------------
1198// Code for other architectures that don't handle unaligned accesses.
1199#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1200 GOMP_FLAG) \
1201 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1202 if (*lhs OP rhs) { \
1203 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1204 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1205 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1206 } else { \
1207 KMP_CHECK_GTID; \
1208 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1209 } \
1210 } \
1211 }
1212#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213
1214MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223 0) // __kmpc_atomic_fixed4_max
1224MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225 0) // __kmpc_atomic_fixed4_min
1226MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231 KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233 KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235 KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237 KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1239MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
1240 1) // __kmpc_atomic_float10_max
1241MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
1242 1) // __kmpc_atomic_float10_min
1243#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1244#if KMP_HAVE_QUAD
1245MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1246 1) // __kmpc_atomic_float16_max
1247MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1248 1) // __kmpc_atomic_float16_min
1249#if (KMP_ARCH_X86)
1250MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1251 1) // __kmpc_atomic_float16_max_a16
1252MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1253 1) // __kmpc_atomic_float16_min_a16
1254#endif // (KMP_ARCH_X86)
1255#endif // KMP_HAVE_QUAD
1256// ------------------------------------------------------------------------
1257// Need separate macros for .EQV. because of the need of complement (~)
1258// OP ignored for critical sections, ^=~ used instead
1259#define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1260 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1261 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1262 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
1263 }
1264
1265// ------------------------------------------------------------------------
1266#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1267// ------------------------------------------------------------------------
1268// X86 or X86_64: no alignment problems ===================================
1269#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1270 GOMP_FLAG) \
1271 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1272 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1273 OP_CMPXCHG(TYPE, BITS, OP) \
1274 }
1275// ------------------------------------------------------------------------
1276#else
1277// ------------------------------------------------------------------------
1278// Code for other architectures that don't handle unaligned accesses.
1279#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1280 GOMP_FLAG) \
1281 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1282 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
1283 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1284 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1285 } else { \
1286 KMP_CHECK_GTID; \
1287 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
1288 } \
1289 }
1290#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1291
1292ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1293 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1294ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1295 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1296ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1297 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1298ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1299 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1300ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1301 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1302ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1303 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1304ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1305 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1306ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1307 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1308
1309// ------------------------------------------------------------------------
1310// Routines for Extended types: long double, _Quad, complex flavours (use
1311// critical section)
1312// TYPE_ID, OP_ID, TYPE - detailed above
1313// OP - operator
1314// LCK_ID - lock identifier, used to possibly distinguish lock variable
1315#define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1316 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1317 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1318 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1319 }
1320
1321/* ------------------------------------------------------------------------- */
1322#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1323// routines for long double type
1324ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1325 1) // __kmpc_atomic_float10_add
1326ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1327 1) // __kmpc_atomic_float10_sub
1328ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1329 1) // __kmpc_atomic_float10_mul
1330ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1331 1) // __kmpc_atomic_float10_div
1332#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1333#if KMP_HAVE_QUAD
1334// routines for _Quad type
1335ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1336 1) // __kmpc_atomic_float16_add
1337ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1338 1) // __kmpc_atomic_float16_sub
1339ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1340 1) // __kmpc_atomic_float16_mul
1341ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1342 1) // __kmpc_atomic_float16_div
1343#if (KMP_ARCH_X86)
1344ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1345 1) // __kmpc_atomic_float16_add_a16
1346ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1347 1) // __kmpc_atomic_float16_sub_a16
1348ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1349 1) // __kmpc_atomic_float16_mul_a16
1350ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1351 1) // __kmpc_atomic_float16_div_a16
1352#endif // (KMP_ARCH_X86)
1353#endif // KMP_HAVE_QUAD
1354// routines for complex types
1355
1356#if USE_CMPXCHG_FIX
1357// workaround for C78287 (complex(kind=4) data type)
1358ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1359 1) // __kmpc_atomic_cmplx4_add
1360ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1361 1) // __kmpc_atomic_cmplx4_sub
1362ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1363 1) // __kmpc_atomic_cmplx4_mul
1364ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1365 1) // __kmpc_atomic_cmplx4_div
1366// end of the workaround for C78287
1367#else
1368ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1369ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1370ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1371ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1372#endif // USE_CMPXCHG_FIX
1373
1374ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1375ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1376ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1377ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1378#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1379ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1380 1) // __kmpc_atomic_cmplx10_add
1381ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1382 1) // __kmpc_atomic_cmplx10_sub
1383ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1384 1) // __kmpc_atomic_cmplx10_mul
1385ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1386 1) // __kmpc_atomic_cmplx10_div
1387#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1388#if KMP_HAVE_QUAD
1389ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1390 1) // __kmpc_atomic_cmplx16_add
1391ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1392 1) // __kmpc_atomic_cmplx16_sub
1393ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1394 1) // __kmpc_atomic_cmplx16_mul
1395ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1396 1) // __kmpc_atomic_cmplx16_div
1397#if (KMP_ARCH_X86)
1398ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1399 1) // __kmpc_atomic_cmplx16_add_a16
1400ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1401 1) // __kmpc_atomic_cmplx16_sub_a16
1402ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1403 1) // __kmpc_atomic_cmplx16_mul_a16
1404ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1405 1) // __kmpc_atomic_cmplx16_div_a16
1406#endif // (KMP_ARCH_X86)
1407#endif // KMP_HAVE_QUAD
1408
1409// OpenMP 4.0: x = expr binop x for non-commutative operations.
1410// Supported only on IA-32 architecture and Intel(R) 64
1411#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1412
1413// ------------------------------------------------------------------------
1414// Operation on *lhs, rhs bound by critical section
1415// OP - operator (it's supposed to contain an assignment)
1416// LCK_ID - lock identifier
1417// Note: don't check gtid as it should always be valid
1418// 1, 2-byte - expect valid parameter, other - check before this macro
1419#define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1420 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1421 \
1422 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
1423 \
1424 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1425
1426#ifdef KMP_GOMP_COMPAT
1427#define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
1428 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1429 KMP_CHECK_GTID; \
1430 OP_CRITICAL_REV(TYPE, OP, 0); \
1431 return; \
1432 }
1433
1434#else
1435#define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1436#endif /* KMP_GOMP_COMPAT */
1437
1438// Beginning of a definition (provides name, parameters, gebug trace)
1439// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1440// fixed)
1441// OP_ID - operation identifier (add, sub, mul, ...)
1442// TYPE - operands' type
1443#define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1444 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1445 TYPE *lhs, TYPE rhs) { \
1446 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1447 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1448
1449// ------------------------------------------------------------------------
1450// Operation on *lhs, rhs using "compare_and_store" routine
1451// TYPE - operands' type
1452// BITS - size in bits, used to distinguish low level calls
1453// OP - operator
1454// Note: temp_val introduced in order to force the compiler to read
1455// *lhs only once (w/o it the compiler reads *lhs twice)
1456#define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1457 { \
1458 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1459 TYPE old_value, new_value; \
1460 temp_val = *lhs; \
1461 old_value = temp_val; \
1462 new_value = (TYPE)(rhs OP old_value); \
1463 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1464 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1465 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1466 KMP_DO_PAUSE; \
1467 \
1468 temp_val = *lhs; \
1469 old_value = temp_val; \
1470 new_value = (TYPE)(rhs OP old_value); \
1471 } \
1472 }
1473
1474// -------------------------------------------------------------------------
1475#define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1476 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1477 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1478 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1479 }
1480
1481// ------------------------------------------------------------------------
1482// Entries definition for integer operands
1483// TYPE_ID - operands type and size (fixed4, float4)
1484// OP_ID - operation identifier (add, sub, mul, ...)
1485// TYPE - operand type
1486// BITS - size in bits, used to distinguish low level calls
1487// OP - operator (used in critical section)
1488// LCK_ID - lock identifier, used to possibly distinguish lock variable
1489
1490// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1491// ------------------------------------------------------------------------
1492// Routines for ATOMIC integer operands, other operators
1493// ------------------------------------------------------------------------
1494// TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1495ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1496 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1497ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1498 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1499ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1500 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1501ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1502 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1503ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1504 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1505ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1506 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1507
1508ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1509 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1510ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1511 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1512ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1513 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1514ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1515 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1516ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1517 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1518ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1519 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1520
1521ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1522 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1523ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1524 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1525ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1526 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1527ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1528 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1529ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1530 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1531ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1532 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1533
1534ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1535 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1536ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1537 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1538ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1539 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1540ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1541 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1542ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1543 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1544ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1545 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1546
1547ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1548 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1549ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1550 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1551
1552ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1553 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1554ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1555 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1556// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1557
1558// ------------------------------------------------------------------------
1559// Routines for Extended types: long double, _Quad, complex flavours (use
1560// critical section)
1561// TYPE_ID, OP_ID, TYPE - detailed above
1562// OP - operator
1563// LCK_ID - lock identifier, used to possibly distinguish lock variable
1564#define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1565 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1566 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1567 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1568 }
1569
1570/* ------------------------------------------------------------------------- */
1571// routines for long double type
1572ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1573 1) // __kmpc_atomic_float10_sub_rev
1574ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1575 1) // __kmpc_atomic_float10_div_rev
1576#if KMP_HAVE_QUAD
1577// routines for _Quad type
1578ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1579 1) // __kmpc_atomic_float16_sub_rev
1580ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1581 1) // __kmpc_atomic_float16_div_rev
1582#if (KMP_ARCH_X86)
1583ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1584 1) // __kmpc_atomic_float16_sub_a16_rev
1585ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1586 1) // __kmpc_atomic_float16_div_a16_rev
1587#endif // KMP_ARCH_X86
1588#endif // KMP_HAVE_QUAD
1589
1590// routines for complex types
1591ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1592 1) // __kmpc_atomic_cmplx4_sub_rev
1593ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1594 1) // __kmpc_atomic_cmplx4_div_rev
1595ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1596 1) // __kmpc_atomic_cmplx8_sub_rev
1597ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1598 1) // __kmpc_atomic_cmplx8_div_rev
1599ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1600 1) // __kmpc_atomic_cmplx10_sub_rev
1601ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1602 1) // __kmpc_atomic_cmplx10_div_rev
1603#if KMP_HAVE_QUAD
1604ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1605 1) // __kmpc_atomic_cmplx16_sub_rev
1606ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1607 1) // __kmpc_atomic_cmplx16_div_rev
1608#if (KMP_ARCH_X86)
1609ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1610 1) // __kmpc_atomic_cmplx16_sub_a16_rev
1611ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1612 1) // __kmpc_atomic_cmplx16_div_a16_rev
1613#endif // KMP_ARCH_X86
1614#endif // KMP_HAVE_QUAD
1615
1616#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1617// End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1618
1619/* ------------------------------------------------------------------------ */
1620/* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1621/* Note: in order to reduce the total number of types combinations */
1622/* it is supposed that compiler converts RHS to longest floating type,*/
1623/* that is _Quad, before call to any of these routines */
1624/* Conversion to _Quad will be done by the compiler during calculation, */
1625/* conversion back to TYPE - before the assignment, like: */
1626/* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1627/* Performance penalty expected because of SW emulation use */
1628/* ------------------------------------------------------------------------ */
1629
1630#define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1631 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1632 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1633 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1634 KA_TRACE(100, \
1635 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1636 gtid));
1637
1638// -------------------------------------------------------------------------
1639#define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1640 GOMP_FLAG) \
1641 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1642 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1643 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1644 }
1645
1646// -------------------------------------------------------------------------
1647#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1648// -------------------------------------------------------------------------
1649// X86 or X86_64: no alignment problems ====================================
1650#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1651 LCK_ID, MASK, GOMP_FLAG) \
1652 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1653 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1654 OP_CMPXCHG(TYPE, BITS, OP) \
1655 }
1656// -------------------------------------------------------------------------
1657#else
1658// ------------------------------------------------------------------------
1659// Code for other architectures that don't handle unaligned accesses.
1660#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1661 LCK_ID, MASK, GOMP_FLAG) \
1662 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1663 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1664 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1665 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1666 } else { \
1667 KMP_CHECK_GTID; \
1668 OP_UPDATE_CRITICAL(TYPE, OP, \
1669 LCK_ID) /* unaligned address - use critical */ \
1670 } \
1671 }
1672#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1673
1674// -------------------------------------------------------------------------
1675#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1676// -------------------------------------------------------------------------
1677#define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1678 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1679 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1680 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1681 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1682 }
1683#define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1684 LCK_ID, GOMP_FLAG) \
1685 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1686 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1687 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1688 }
1689#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1690
1691// RHS=float8
1692ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1693 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1694ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1695 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1696ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1697 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1698ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1699 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1700ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1701 0) // __kmpc_atomic_fixed4_mul_float8
1702ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1703 0) // __kmpc_atomic_fixed4_div_float8
1704ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1705 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1706ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1707 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1708ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1709 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1710ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1711 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1712ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1713 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1714ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1715 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1716
1717// RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1718// use them)
1719#if KMP_HAVE_QUAD
1720ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1721 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1722ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1723 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1724ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1725 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1726ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1727 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1728ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1729 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1730ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1731 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1732ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1733 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1734ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1735 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1736
1737ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1738 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1739ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1740 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1741ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1742 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1743ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1744 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1745ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1746 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1747ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1748 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1749ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1750 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1751ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1752 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1753
1754ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1755 0) // __kmpc_atomic_fixed4_add_fp
1756ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1757 0) // __kmpc_atomic_fixed4u_add_fp
1758ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1759 0) // __kmpc_atomic_fixed4_sub_fp
1760ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1761 0) // __kmpc_atomic_fixed4u_sub_fp
1762ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1763 0) // __kmpc_atomic_fixed4_mul_fp
1764ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1765 0) // __kmpc_atomic_fixed4u_mul_fp
1766ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1767 0) // __kmpc_atomic_fixed4_div_fp
1768ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1769 0) // __kmpc_atomic_fixed4u_div_fp
1770
1771ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1772 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1773ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1774 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1775ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1776 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1777ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1778 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1779ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1780 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1781ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1782 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1783ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1784 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1785ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1786 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1787
1788ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1789 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1790ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1791 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1792ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1793 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1794ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1795 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1796
1797ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1798 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1799ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1800 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1801ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1802 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1803ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1804 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1805
1806#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1807ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1808 1) // __kmpc_atomic_float10_add_fp
1809ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1810 1) // __kmpc_atomic_float10_sub_fp
1811ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1812 1) // __kmpc_atomic_float10_mul_fp
1813ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1814 1) // __kmpc_atomic_float10_div_fp
1815
1816// Reverse operations
1817ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1818 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1819ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1820 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1821ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1822 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1823ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1824 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1825
1826ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1827 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1828ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1829 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1830ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1831 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1832ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1833 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1834
1835ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1836 0) // __kmpc_atomic_fixed4_sub_rev_fp
1837ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1838 0) // __kmpc_atomic_fixed4u_sub_rev_fp
1839ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1840 0) // __kmpc_atomic_fixed4_div_rev_fp
1841ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1842 0) // __kmpc_atomic_fixed4u_div_rev_fp
1843
1844ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1845 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1846ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1847 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1848ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1849 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1850ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1851 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1852
1853ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1854 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1855ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1856 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1857
1858ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1859 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1860ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1861 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1862
1863ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1864 1) // __kmpc_atomic_float10_sub_rev_fp
1865ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1866 1) // __kmpc_atomic_float10_div_rev_fp
1867#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1868
1869#endif // KMP_HAVE_QUAD
1870
1871#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1872// ------------------------------------------------------------------------
1873// X86 or X86_64: no alignment problems ====================================
1874#if USE_CMPXCHG_FIX
1875// workaround for C78287 (complex(kind=4) data type)
1876#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1877 LCK_ID, MASK, GOMP_FLAG) \
1878 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1879 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1880 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1881 }
1882// end of the second part of the workaround for C78287
1883#else
1884#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1885 LCK_ID, MASK, GOMP_FLAG) \
1886 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1887 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1888 OP_CMPXCHG(TYPE, BITS, OP) \
1889 }
1890#endif // USE_CMPXCHG_FIX
1891#else
1892// ------------------------------------------------------------------------
1893// Code for other architectures that don't handle unaligned accesses.
1894#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1895 LCK_ID, MASK, GOMP_FLAG) \
1896 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1897 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1898 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1899 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1900 } else { \
1901 KMP_CHECK_GTID; \
1902 OP_UPDATE_CRITICAL(TYPE, OP, \
1903 LCK_ID) /* unaligned address - use critical */ \
1904 } \
1905 }
1906#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1907
1908ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1909 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1910ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1911 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1912ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1913 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1914ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1915 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1916
1917// READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1918#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1919
1920// ------------------------------------------------------------------------
1921// Atomic READ routines
1922
1923// ------------------------------------------------------------------------
1924// Beginning of a definition (provides name, parameters, gebug trace)
1925// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1926// fixed)
1927// OP_ID - operation identifier (add, sub, mul, ...)
1928// TYPE - operands' type
1929#define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1930 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1931 TYPE *loc) { \
1932 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1933 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1934
1935// ------------------------------------------------------------------------
1936// Operation on *lhs, rhs using "compare_and_store_ret" routine
1937// TYPE - operands' type
1938// BITS - size in bits, used to distinguish low level calls
1939// OP - operator
1940// Note: temp_val introduced in order to force the compiler to read
1941// *lhs only once (w/o it the compiler reads *lhs twice)
1942// TODO: check if it is still necessary
1943// Return old value regardless of the result of "compare & swap# operation
1944#define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1945 { \
1946 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1947 union f_i_union { \
1948 TYPE f_val; \
1949 kmp_int##BITS i_val; \
1950 }; \
1951 union f_i_union old_value; \
1952 temp_val = *loc; \
1953 old_value.f_val = temp_val; \
1954 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1955 (kmp_int##BITS *)loc, \
1956 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1957 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1958 new_value = old_value.f_val; \
1959 return new_value; \
1960 }
1961
1962// -------------------------------------------------------------------------
1963// Operation on *lhs, rhs bound by critical section
1964// OP - operator (it's supposed to contain an assignment)
1965// LCK_ID - lock identifier
1966// Note: don't check gtid as it should always be valid
1967// 1, 2-byte - expect valid parameter, other - check before this macro
1968#define OP_CRITICAL_READ(OP, LCK_ID) \
1969 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1970 \
1971 new_value = (*loc); \
1972 \
1973 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1974
1975// -------------------------------------------------------------------------
1976#ifdef KMP_GOMP_COMPAT
1977#define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1978 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1979 KMP_CHECK_GTID; \
1980 OP_CRITICAL_READ(OP, 0); \
1981 return new_value; \
1982 }
1983#else
1984#define OP_GOMP_CRITICAL_READ(OP, FLAG)
1985#endif /* KMP_GOMP_COMPAT */
1986
1987// -------------------------------------------------------------------------
1988#define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1989 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1990 TYPE new_value; \
1991 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1992 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1993 return new_value; \
1994 }
1995// -------------------------------------------------------------------------
1996#define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1997 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1998 TYPE new_value; \
1999 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
2000 OP_CMPXCHG_READ(TYPE, BITS, OP) \
2001 }
2002// ------------------------------------------------------------------------
2003// Routines for Extended types: long double, _Quad, complex flavours (use
2004// critical section)
2005// TYPE_ID, OP_ID, TYPE - detailed above
2006// OP - operator
2007// LCK_ID - lock identifier, used to possibly distinguish lock variable
2008#define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2009 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
2010 TYPE new_value; \
2011 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
2012 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
2013 return new_value; \
2014 }
2015
2016// ------------------------------------------------------------------------
2017// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2018// value doesn't work.
2019// Let's return the read value through the additional parameter.
2020#if (KMP_OS_WINDOWS)
2021
2022#define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
2023 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2024 \
2025 (*out) = (*loc); \
2026 \
2027 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2028// ------------------------------------------------------------------------
2029#ifdef KMP_GOMP_COMPAT
2030#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
2031 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2032 KMP_CHECK_GTID; \
2033 OP_CRITICAL_READ_WRK(OP, 0); \
2034 }
2035#else
2036#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2037#endif /* KMP_GOMP_COMPAT */
2038// ------------------------------------------------------------------------
2039#define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2040 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2041 TYPE *loc) { \
2042 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2043 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2044
2045// ------------------------------------------------------------------------
2046#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2047 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2048 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
2049 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
2050 }
2051
2052#endif // KMP_OS_WINDOWS
2053
2054// ------------------------------------------------------------------------
2055// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2056ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2057ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2058 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2059ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2060 KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2061ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2062 KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2063
2064// !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2065ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2066 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2067ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2068 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2069
2070ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2071 1) // __kmpc_atomic_float10_rd
2072#if KMP_HAVE_QUAD
2073ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2074 1) // __kmpc_atomic_float16_rd
2075#endif // KMP_HAVE_QUAD
2076
2077// Fix for CQ220361 on Windows* OS
2078#if (KMP_OS_WINDOWS)
2079ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2080 1) // __kmpc_atomic_cmplx4_rd
2081#else
2082ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2083 1) // __kmpc_atomic_cmplx4_rd
2084#endif // (KMP_OS_WINDOWS)
2085ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2086 1) // __kmpc_atomic_cmplx8_rd
2087ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2088 1) // __kmpc_atomic_cmplx10_rd
2089#if KMP_HAVE_QUAD
2090ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2091 1) // __kmpc_atomic_cmplx16_rd
2092#if (KMP_ARCH_X86)
2093ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2094 1) // __kmpc_atomic_float16_a16_rd
2095ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2096 1) // __kmpc_atomic_cmplx16_a16_rd
2097#endif // (KMP_ARCH_X86)
2098#endif // KMP_HAVE_QUAD
2099
2100// ------------------------------------------------------------------------
2101// Atomic WRITE routines
2102
2103#define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2104 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2105 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2106 KMP_XCHG_FIXED##BITS(lhs, rhs); \
2107 }
2108// ------------------------------------------------------------------------
2109#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2110 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2111 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2112 KMP_XCHG_REAL##BITS(lhs, rhs); \
2113 }
2114
2115// ------------------------------------------------------------------------
2116// Operation on *lhs, rhs using "compare_and_store" routine
2117// TYPE - operands' type
2118// BITS - size in bits, used to distinguish low level calls
2119// OP - operator
2120// Note: temp_val introduced in order to force the compiler to read
2121// *lhs only once (w/o it the compiler reads *lhs twice)
2122#define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2123 { \
2124 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2125 TYPE old_value, new_value; \
2126 temp_val = *lhs; \
2127 old_value = temp_val; \
2128 new_value = rhs; \
2129 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2130 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2131 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2132 temp_val = *lhs; \
2133 old_value = temp_val; \
2134 new_value = rhs; \
2135 } \
2136 }
2137
2138// -------------------------------------------------------------------------
2139#define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2140 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2141 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2142 OP_CMPXCHG_WR(TYPE, BITS, OP) \
2143 }
2144
2145// ------------------------------------------------------------------------
2146// Routines for Extended types: long double, _Quad, complex flavours (use
2147// critical section)
2148// TYPE_ID, OP_ID, TYPE - detailed above
2149// OP - operator
2150// LCK_ID - lock identifier, used to possibly distinguish lock variable
2151#define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2152 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2153 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2154 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2155 }
2156// -------------------------------------------------------------------------
2157
2158ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2159 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2160ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2161 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2162ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2163 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2164#if (KMP_ARCH_X86)
2165ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2166 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2167#else
2168ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2169 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2170#endif // (KMP_ARCH_X86)
2171
2172ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2173 KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2174#if (KMP_ARCH_X86)
2175ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2176 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2177#else
2178ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2179 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2180#endif // (KMP_ARCH_X86)
2181
2182ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2183 1) // __kmpc_atomic_float10_wr
2184#if KMP_HAVE_QUAD
2185ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2186 1) // __kmpc_atomic_float16_wr
2187#endif // KMP_HAVE_QUAD
2188ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2189ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2190 1) // __kmpc_atomic_cmplx8_wr
2191ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2192 1) // __kmpc_atomic_cmplx10_wr
2193#if KMP_HAVE_QUAD
2194ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2195 1) // __kmpc_atomic_cmplx16_wr
2196#if (KMP_ARCH_X86)
2197ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2198 1) // __kmpc_atomic_float16_a16_wr
2199ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2200 1) // __kmpc_atomic_cmplx16_a16_wr
2201#endif // (KMP_ARCH_X86)
2202#endif // KMP_HAVE_QUAD
2203
2204// ------------------------------------------------------------------------
2205// Atomic CAPTURE routines
2206
2207// Beginning of a definition (provides name, parameters, gebug trace)
2208// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2209// fixed)
2210// OP_ID - operation identifier (add, sub, mul, ...)
2211// TYPE - operands' type
2212#define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2213 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2214 TYPE *lhs, TYPE rhs, int flag) { \
2215 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2216 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2217
2218// -------------------------------------------------------------------------
2219// Operation on *lhs, rhs bound by critical section
2220// OP - operator (it's supposed to contain an assignment)
2221// LCK_ID - lock identifier
2222// Note: don't check gtid as it should always be valid
2223// 1, 2-byte - expect valid parameter, other - check before this macro
2224#define OP_CRITICAL_CPT(OP, LCK_ID) \
2225 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2226 \
2227 if (flag) { \
2228 (*lhs) OP rhs; \
2229 new_value = (*lhs); \
2230 } else { \
2231 new_value = (*lhs); \
2232 (*lhs) OP rhs; \
2233 } \
2234 \
2235 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2236 return new_value;
2237
2238#define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
2239 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2240 \
2241 if (flag) { \
2242 (*lhs) = (TYPE)((*lhs)OP rhs); \
2243 new_value = (*lhs); \
2244 } else { \
2245 new_value = (*lhs); \
2246 (*lhs) = (TYPE)((*lhs)OP rhs); \
2247 } \
2248 \
2249 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2250 return new_value;
2251
2252// ------------------------------------------------------------------------
2253#ifdef KMP_GOMP_COMPAT
2254#define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
2255 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2256 KMP_CHECK_GTID; \
2257 OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
2258 }
2259#else
2260#define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2261#endif /* KMP_GOMP_COMPAT */
2262
2263// ------------------------------------------------------------------------
2264// Operation on *lhs, rhs using "compare_and_store" routine
2265// TYPE - operands' type
2266// BITS - size in bits, used to distinguish low level calls
2267// OP - operator
2268// Note: temp_val introduced in order to force the compiler to read
2269// *lhs only once (w/o it the compiler reads *lhs twice)
2270#define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2271 { \
2272 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2273 TYPE old_value, new_value; \
2274 temp_val = *lhs; \
2275 old_value = temp_val; \
2276 new_value = (TYPE)(old_value OP rhs); \
2277 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2278 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2279 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2280 temp_val = *lhs; \
2281 old_value = temp_val; \
2282 new_value = (TYPE)(old_value OP rhs); \
2283 } \
2284 if (flag) { \
2285 return new_value; \
2286 } else \
2287 return old_value; \
2288 }
2289
2290// -------------------------------------------------------------------------
2291#define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2292 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2293 TYPE new_value; \
2294 (void)new_value; \
2295 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2296 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2297 }
2298
2299// -------------------------------------------------------------------------
2300#define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2301 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2302 TYPE old_value, new_value; \
2303 (void)new_value; \
2304 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2305 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2306 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2307 if (flag) { \
2308 return old_value OP rhs; \
2309 } else \
2310 return old_value; \
2311 }
2312// -------------------------------------------------------------------------
2313
2314ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2315 0) // __kmpc_atomic_fixed4_add_cpt
2316ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2317 0) // __kmpc_atomic_fixed4_sub_cpt
2318ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2319 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2320ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2321 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2322
2323ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2324 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2325ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2326 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2327ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2328 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2329ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2330 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2331
2332// ------------------------------------------------------------------------
2333// Entries definition for integer operands
2334// TYPE_ID - operands type and size (fixed4, float4)
2335// OP_ID - operation identifier (add, sub, mul, ...)
2336// TYPE - operand type
2337// BITS - size in bits, used to distinguish low level calls
2338// OP - operator (used in critical section)
2339// TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2340// ------------------------------------------------------------------------
2341// Routines for ATOMIC integer operands, other operators
2342// ------------------------------------------------------------------------
2343// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2344ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2345 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2346ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2347 0) // __kmpc_atomic_fixed1_andb_cpt
2348ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2349 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2350ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2351 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2352ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2353 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2354ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2355 0) // __kmpc_atomic_fixed1_orb_cpt
2356ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2357 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2358ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2359 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2360ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2361 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2362ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2363 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2364ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2365 0) // __kmpc_atomic_fixed1_xor_cpt
2366ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2367 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2368ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2369 0) // __kmpc_atomic_fixed2_andb_cpt
2370ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2371 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2372ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2373 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2374ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2375 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2376ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2377 0) // __kmpc_atomic_fixed2_orb_cpt
2378ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2379 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2380ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2381 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2382ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2383 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2384ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2385 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2386ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2387 0) // __kmpc_atomic_fixed2_xor_cpt
2388ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2389 0) // __kmpc_atomic_fixed4_andb_cpt
2390ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2391 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2392ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2393 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2394ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2395 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2396ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2397 0) // __kmpc_atomic_fixed4_orb_cpt
2398ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2399 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2400ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2401 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2402ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2403 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2404ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2405 0) // __kmpc_atomic_fixed4_xor_cpt
2406ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2407 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2408ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2409 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2410ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2411 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2412ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2413 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2414ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2415 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2416ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2417 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2418ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2419 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2420ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2421 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2422ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2423 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2424ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2425 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2426ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2427 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2428ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2429 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2430ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2431 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2432// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2433
2434// CAPTURE routines for mixed types RHS=float16
2435#if KMP_HAVE_QUAD
2436
2437// Beginning of a definition (provides name, parameters, gebug trace)
2438// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2439// fixed)
2440// OP_ID - operation identifier (add, sub, mul, ...)
2441// TYPE - operands' type
2442#define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2443 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2444 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2445 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2446 KA_TRACE(100, \
2447 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2448 gtid));
2449
2450// -------------------------------------------------------------------------
2451#define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2452 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2453 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2454 TYPE new_value; \
2455 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2456 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2457 }
2458
2459// -------------------------------------------------------------------------
2460#define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2461 LCK_ID, GOMP_FLAG) \
2462 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2463 TYPE new_value; \
2464 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2465 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2466 }
2467
2468ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2469 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2470ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2471 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2472ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2473 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2474ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2475 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2476ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2477 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2478ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2479 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2480ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2481 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2482ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2483 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2484
2485ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2486 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2487ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2488 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2489ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2490 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2491ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2492 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2493ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2494 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2495ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2496 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2497ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2498 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2499ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2500 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2501
2502ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2503 0) // __kmpc_atomic_fixed4_add_cpt_fp
2504ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2505 0) // __kmpc_atomic_fixed4u_add_cpt_fp
2506ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2507 0) // __kmpc_atomic_fixed4_sub_cpt_fp
2508ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2509 0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2510ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2511 0) // __kmpc_atomic_fixed4_mul_cpt_fp
2512ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2513 0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2514ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2515 0) // __kmpc_atomic_fixed4_div_cpt_fp
2516ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2517 0) // __kmpc_atomic_fixed4u_div_cpt_fp
2518
2519ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2520 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2521ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2522 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2523ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2524 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2525ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2526 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2527ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2528 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2529ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2530 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2531ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2532 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2533ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2534 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2535
2536ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2537 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2538ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2539 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2540ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2541 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2542ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2543 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2544
2545ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2546 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2547ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2548 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2549ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2550 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2551ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2552 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2553
2554ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2555 1) // __kmpc_atomic_float10_add_cpt_fp
2556ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2557 1) // __kmpc_atomic_float10_sub_cpt_fp
2558ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2559 1) // __kmpc_atomic_float10_mul_cpt_fp
2560ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2561 1) // __kmpc_atomic_float10_div_cpt_fp
2562
2563#endif // KMP_HAVE_QUAD
2564
2565// ------------------------------------------------------------------------
2566// Routines for C/C++ Reduction operators && and ||
2567
2568// -------------------------------------------------------------------------
2569// Operation on *lhs, rhs bound by critical section
2570// OP - operator (it's supposed to contain an assignment)
2571// LCK_ID - lock identifier
2572// Note: don't check gtid as it should always be valid
2573// 1, 2-byte - expect valid parameter, other - check before this macro
2574#define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2575 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2576 \
2577 if (flag) { \
2578 new_value OP rhs; \
2579 (*lhs) = new_value; \
2580 } else { \
2581 new_value = (*lhs); \
2582 (*lhs) OP rhs; \
2583 } \
2584 \
2585 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2586
2587// ------------------------------------------------------------------------
2588#ifdef KMP_GOMP_COMPAT
2589#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2590 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2591 KMP_CHECK_GTID; \
2592 OP_CRITICAL_L_CPT(OP, 0); \
2593 return new_value; \
2594 }
2595#else
2596#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2597#endif /* KMP_GOMP_COMPAT */
2598
2599// ------------------------------------------------------------------------
2600// Need separate macros for &&, || because there is no combined assignment
2601#define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2602 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2603 TYPE new_value; \
2604 (void)new_value; \
2605 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2606 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2607 }
2608
2609ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2610 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2611ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2612 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2613ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2614 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2615ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2616 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2617ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2618 0) // __kmpc_atomic_fixed4_andl_cpt
2619ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2620 0) // __kmpc_atomic_fixed4_orl_cpt
2621ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2622 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2623ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2624 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2625
2626// -------------------------------------------------------------------------
2627// Routines for Fortran operators that matched no one in C:
2628// MAX, MIN, .EQV., .NEQV.
2629// Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2630// Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2631
2632// -------------------------------------------------------------------------
2633// MIN and MAX need separate macros
2634// OP - operator to check if we need any actions?
2635#define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2636 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2637 \
2638 if (*lhs OP rhs) { /* still need actions? */ \
2639 old_value = *lhs; \
2640 *lhs = rhs; \
2641 if (flag) \
2642 new_value = rhs; \
2643 else \
2644 new_value = old_value; \
2645 } else { \
2646 new_value = *lhs; \
2647 } \
2648 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2649 return new_value;
2650
2651// -------------------------------------------------------------------------
2652#ifdef KMP_GOMP_COMPAT
2653#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2654 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2655 KMP_CHECK_GTID; \
2656 MIN_MAX_CRITSECT_CPT(OP, 0); \
2657 }
2658#else
2659#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2660#endif /* KMP_GOMP_COMPAT */
2661
2662// -------------------------------------------------------------------------
2663#define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2664 { \
2665 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2666 /*TYPE old_value; */ \
2667 temp_val = *lhs; \
2668 old_value = temp_val; \
2669 while (old_value OP rhs && /* still need actions? */ \
2670 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2671 (kmp_int##BITS *)lhs, \
2672 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2673 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2674 temp_val = *lhs; \
2675 old_value = temp_val; \
2676 } \
2677 if (flag) \
2678 return rhs; \
2679 else \
2680 return old_value; \
2681 }
2682
2683// -------------------------------------------------------------------------
2684// 1-byte, 2-byte operands - use critical section
2685#define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2686 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2687 TYPE new_value, old_value; \
2688 if (*lhs OP rhs) { /* need actions? */ \
2689 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2690 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2691 } \
2692 return *lhs; \
2693 }
2694
2695#define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2696 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2697 TYPE new_value, old_value; \
2698 (void)new_value; \
2699 if (*lhs OP rhs) { \
2700 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2701 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2702 } \
2703 return *lhs; \
2704 }
2705
2706MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2707 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2708MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2709 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2710MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2711 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2712MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2713 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2714MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2715 0) // __kmpc_atomic_fixed4_max_cpt
2716MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2717 0) // __kmpc_atomic_fixed4_min_cpt
2718MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2719 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2720MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2721 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2722MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2723 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2724MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2725 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2726MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2727 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2728MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2729 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2730MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r,
2731 1) // __kmpc_atomic_float10_max_cpt
2732MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r,
2733 1) // __kmpc_atomic_float10_min_cpt
2734#if KMP_HAVE_QUAD
2735MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2736 1) // __kmpc_atomic_float16_max_cpt
2737MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2738 1) // __kmpc_atomic_float16_min_cpt
2739#if (KMP_ARCH_X86)
2740MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2741 1) // __kmpc_atomic_float16_max_a16_cpt
2742MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2743 1) // __kmpc_atomic_float16_mix_a16_cpt
2744#endif // (KMP_ARCH_X86)
2745#endif // KMP_HAVE_QUAD
2746
2747// ------------------------------------------------------------------------
2748#ifdef KMP_GOMP_COMPAT
2749#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2750 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2751 KMP_CHECK_GTID; \
2752 OP_CRITICAL_CPT(OP, 0); \
2753 }
2754#else
2755#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2756#endif /* KMP_GOMP_COMPAT */
2757// ------------------------------------------------------------------------
2758#define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2759 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2760 TYPE new_value; \
2761 (void)new_value; \
2762 OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
2763 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2764 }
2765
2766// ------------------------------------------------------------------------
2767
2768ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2769 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2770ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2771 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2772ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2773 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2774ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2775 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2776ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2777 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2778ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2779 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2780ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2781 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2782ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2783 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2784
2785// ------------------------------------------------------------------------
2786// Routines for Extended types: long double, _Quad, complex flavours (use
2787// critical section)
2788// TYPE_ID, OP_ID, TYPE - detailed above
2789// OP - operator
2790// LCK_ID - lock identifier, used to possibly distinguish lock variable
2791#define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2792 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2793 TYPE new_value; \
2794 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2795 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2796 }
2797
2798// ------------------------------------------------------------------------
2799// Workaround for cmplx4. Regular routines with return value don't work
2800// on Win_32e. Let's return captured values through the additional parameter.
2801#define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2802 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2803 \
2804 if (flag) { \
2805 (*lhs) OP rhs; \
2806 (*out) = (*lhs); \
2807 } else { \
2808 (*out) = (*lhs); \
2809 (*lhs) OP rhs; \
2810 } \
2811 \
2812 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2813 return;
2814// ------------------------------------------------------------------------
2815
2816#ifdef KMP_GOMP_COMPAT
2817#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2818 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2819 KMP_CHECK_GTID; \
2820 OP_CRITICAL_CPT_WRK(OP## =, 0); \
2821 }
2822#else
2823#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2824#endif /* KMP_GOMP_COMPAT */
2825// ------------------------------------------------------------------------
2826
2827#define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2828 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2829 TYPE rhs, TYPE *out, int flag) { \
2830 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2831 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2832// ------------------------------------------------------------------------
2833
2834#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2835 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2836 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2837 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2838 }
2839// The end of workaround for cmplx4
2840
2841/* ------------------------------------------------------------------------- */
2842// routines for long double type
2843ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2844 1) // __kmpc_atomic_float10_add_cpt
2845ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2846 1) // __kmpc_atomic_float10_sub_cpt
2847ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2848 1) // __kmpc_atomic_float10_mul_cpt
2849ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2850 1) // __kmpc_atomic_float10_div_cpt
2851#if KMP_HAVE_QUAD
2852// routines for _Quad type
2853ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2854 1) // __kmpc_atomic_float16_add_cpt
2855ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2856 1) // __kmpc_atomic_float16_sub_cpt
2857ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2858 1) // __kmpc_atomic_float16_mul_cpt
2859ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2860 1) // __kmpc_atomic_float16_div_cpt
2861#if (KMP_ARCH_X86)
2862ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2863 1) // __kmpc_atomic_float16_add_a16_cpt
2864ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2865 1) // __kmpc_atomic_float16_sub_a16_cpt
2866ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2867 1) // __kmpc_atomic_float16_mul_a16_cpt
2868ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2869 1) // __kmpc_atomic_float16_div_a16_cpt
2870#endif // (KMP_ARCH_X86)
2871#endif // KMP_HAVE_QUAD
2872
2873// routines for complex types
2874
2875// cmplx4 routines to return void
2876ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2877 1) // __kmpc_atomic_cmplx4_add_cpt
2878ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2879 1) // __kmpc_atomic_cmplx4_sub_cpt
2880ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2881 1) // __kmpc_atomic_cmplx4_mul_cpt
2882ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2883 1) // __kmpc_atomic_cmplx4_div_cpt
2884
2885ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2886 1) // __kmpc_atomic_cmplx8_add_cpt
2887ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2888 1) // __kmpc_atomic_cmplx8_sub_cpt
2889ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2890 1) // __kmpc_atomic_cmplx8_mul_cpt
2891ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2892 1) // __kmpc_atomic_cmplx8_div_cpt
2893ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2894 1) // __kmpc_atomic_cmplx10_add_cpt
2895ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2896 1) // __kmpc_atomic_cmplx10_sub_cpt
2897ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2898 1) // __kmpc_atomic_cmplx10_mul_cpt
2899ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2900 1) // __kmpc_atomic_cmplx10_div_cpt
2901#if KMP_HAVE_QUAD
2902ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2903 1) // __kmpc_atomic_cmplx16_add_cpt
2904ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2905 1) // __kmpc_atomic_cmplx16_sub_cpt
2906ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2907 1) // __kmpc_atomic_cmplx16_mul_cpt
2908ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2909 1) // __kmpc_atomic_cmplx16_div_cpt
2910#if (KMP_ARCH_X86)
2911ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2912 1) // __kmpc_atomic_cmplx16_add_a16_cpt
2913ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2914 1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2915ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2916 1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2917ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2918 1) // __kmpc_atomic_cmplx16_div_a16_cpt
2919#endif // (KMP_ARCH_X86)
2920#endif // KMP_HAVE_QUAD
2921
2922// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2923// binop x; v = x; } for non-commutative operations.
2924// Supported only on IA-32 architecture and Intel(R) 64
2925
2926// -------------------------------------------------------------------------
2927// Operation on *lhs, rhs bound by critical section
2928// OP - operator (it's supposed to contain an assignment)
2929// LCK_ID - lock identifier
2930// Note: don't check gtid as it should always be valid
2931// 1, 2-byte - expect valid parameter, other - check before this macro
2932#define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
2933 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2934 \
2935 if (flag) { \
2936 /*temp_val = (*lhs);*/ \
2937 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2938 new_value = (*lhs); \
2939 } else { \
2940 new_value = (*lhs); \
2941 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2942 } \
2943 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2944 return new_value;
2945
2946// ------------------------------------------------------------------------
2947#ifdef KMP_GOMP_COMPAT
2948#define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
2949 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2950 KMP_CHECK_GTID; \
2951 OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
2952 }
2953#else
2954#define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2955#endif /* KMP_GOMP_COMPAT */
2956
2957// ------------------------------------------------------------------------
2958// Operation on *lhs, rhs using "compare_and_store" routine
2959// TYPE - operands' type
2960// BITS - size in bits, used to distinguish low level calls
2961// OP - operator
2962// Note: temp_val introduced in order to force the compiler to read
2963// *lhs only once (w/o it the compiler reads *lhs twice)
2964#define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2965 { \
2966 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2967 TYPE old_value, new_value; \
2968 temp_val = *lhs; \
2969 old_value = temp_val; \
2970 new_value = (TYPE)(rhs OP old_value); \
2971 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2972 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2973 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2974 temp_val = *lhs; \
2975 old_value = temp_val; \
2976 new_value = (TYPE)(rhs OP old_value); \
2977 } \
2978 if (flag) { \
2979 return new_value; \
2980 } else \
2981 return old_value; \
2982 }
2983
2984// -------------------------------------------------------------------------
2985#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2986 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2987 TYPE new_value; \
2988 (void)new_value; \
2989 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
2990 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2991 }
2992
2993ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2994 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2995ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2996 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2997ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2998 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2999ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
3000 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
3001ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
3002 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
3003ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
3004 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
3005ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
3006 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
3007ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
3008 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
3009ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
3010 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
3011ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
3012 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
3013ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3014 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3015ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3016 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3017ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3018 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3019ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3020 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3021ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3022 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3023ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3024 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3025ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3026 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3027ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3028 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3029ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3030 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3031ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3032 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3033ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3034 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3035ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3036 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3037ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3038 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3039ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3040 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3041ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3042 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3043ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3044 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3045ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3046 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3047ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3048 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3049// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
3050
3051// ------------------------------------------------------------------------
3052// Routines for Extended types: long double, _Quad, complex flavours (use
3053// critical section)
3054// TYPE_ID, OP_ID, TYPE - detailed above
3055// OP - operator
3056// LCK_ID - lock identifier, used to possibly distinguish lock variable
3057#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
3058 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
3059 TYPE new_value; \
3060 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
3061 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3062 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
3063 }
3064
3065/* ------------------------------------------------------------------------- */
3066// routines for long double type
3067ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3068 1) // __kmpc_atomic_float10_sub_cpt_rev
3069ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3070 1) // __kmpc_atomic_float10_div_cpt_rev
3071#if KMP_HAVE_QUAD
3072// routines for _Quad type
3073ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3074 1) // __kmpc_atomic_float16_sub_cpt_rev
3075ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3076 1) // __kmpc_atomic_float16_div_cpt_rev
3077#if (KMP_ARCH_X86)
3078ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3079 1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3080ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3081 1) // __kmpc_atomic_float16_div_a16_cpt_rev
3082#endif // (KMP_ARCH_X86)
3083#endif // KMP_HAVE_QUAD
3084
3085// routines for complex types
3086
3087// ------------------------------------------------------------------------
3088// Workaround for cmplx4. Regular routines with return value don't work
3089// on Win_32e. Let's return captured values through the additional parameter.
3090#define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3091 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3092 \
3093 if (flag) { \
3094 (*lhs) = (rhs)OP(*lhs); \
3095 (*out) = (*lhs); \
3096 } else { \
3097 (*out) = (*lhs); \
3098 (*lhs) = (rhs)OP(*lhs); \
3099 } \
3100 \
3101 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3102 return;
3103// ------------------------------------------------------------------------
3104
3105#ifdef KMP_GOMP_COMPAT
3106#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3107 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3108 KMP_CHECK_GTID; \
3109 OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3110 }
3111#else
3112#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3113#endif /* KMP_GOMP_COMPAT */
3114// ------------------------------------------------------------------------
3115
3116#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3117 GOMP_FLAG) \
3118 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3119 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3120 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3121 }
3122// The end of workaround for cmplx4
3123
3124// !!! TODO: check if we need to return void for cmplx4 routines
3125// cmplx4 routines to return void
3126ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3127 1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3128ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3129 1) // __kmpc_atomic_cmplx4_div_cpt_rev
3130
3131ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3132 1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3133ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3134 1) // __kmpc_atomic_cmplx8_div_cpt_rev
3135ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3136 1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3137ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3138 1) // __kmpc_atomic_cmplx10_div_cpt_rev
3139#if KMP_HAVE_QUAD
3140ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3141 1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3142ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3143 1) // __kmpc_atomic_cmplx16_div_cpt_rev
3144#if (KMP_ARCH_X86)
3145ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3146 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3147ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3148 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3149#endif // (KMP_ARCH_X86)
3150#endif // KMP_HAVE_QUAD
3151
3152// Capture reverse for mixed type: RHS=float16
3153#if KMP_HAVE_QUAD
3154
3155// Beginning of a definition (provides name, parameters, gebug trace)
3156// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3157// fixed)
3158// OP_ID - operation identifier (add, sub, mul, ...)
3159// TYPE - operands' type
3160// -------------------------------------------------------------------------
3161#define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3162 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3163 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3164 TYPE new_value; \
3165 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3166 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3167 }
3168
3169// -------------------------------------------------------------------------
3170#define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3171 LCK_ID, GOMP_FLAG) \
3172 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3173 TYPE new_value; \
3174 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
3175 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
3176 }
3177
3178ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3179 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3180ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3181 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3182ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3183 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3184ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3185 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3186
3187ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3188 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3189ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3190 1,
3191 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3192ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3193 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3194ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3195 1,
3196 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3197
3198ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3199 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3200ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3201 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3202ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3203 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3204ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3205 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3206
3207ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3208 7,
3209 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3210ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3211 8i, 7,
3212 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3213ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3214 7,
3215 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3216ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3217 8i, 7,
3218 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3219
3220ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3221 4r, 3,
3222 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3223ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3224 4r, 3,
3225 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3226
3227ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3228 8r, 7,
3229 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3230ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3231 8r, 7,
3232 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3233
3234ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3235 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3236ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3237 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3238
3239#endif // KMP_HAVE_QUAD
3240
3241// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3242
3243#define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3244 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3245 TYPE rhs) { \
3246 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3247 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3248
3249#define CRITICAL_SWP(LCK_ID) \
3250 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3251 \
3252 old_value = (*lhs); \
3253 (*lhs) = rhs; \
3254 \
3255 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3256 return old_value;
3257
3258// ------------------------------------------------------------------------
3259#ifdef KMP_GOMP_COMPAT
3260#define GOMP_CRITICAL_SWP(FLAG) \
3261 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3262 KMP_CHECK_GTID; \
3263 CRITICAL_SWP(0); \
3264 }
3265#else
3266#define GOMP_CRITICAL_SWP(FLAG)
3267#endif /* KMP_GOMP_COMPAT */
3268
3269#define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3270 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3271 TYPE old_value; \
3272 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3273 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3274 return old_value; \
3275 }
3276// ------------------------------------------------------------------------
3277#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3278 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3279 TYPE old_value; \
3280 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3281 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3282 return old_value; \
3283 }
3284
3285// ------------------------------------------------------------------------
3286#define CMPXCHG_SWP(TYPE, BITS) \
3287 { \
3288 TYPE KMP_ATOMIC_VOLATILE temp_val; \
3289 TYPE old_value, new_value; \
3290 temp_val = *lhs; \
3291 old_value = temp_val; \
3292 new_value = rhs; \
3293 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3294 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3295 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3296 temp_val = *lhs; \
3297 old_value = temp_val; \
3298 new_value = rhs; \
3299 } \
3300 return old_value; \
3301 }
3302
3303// -------------------------------------------------------------------------
3304#define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3305 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3306 TYPE old_value; \
3307 (void)old_value; \
3308 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3309 CMPXCHG_SWP(TYPE, BITS) \
3310 }
3311
3312ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3313ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3314ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3315
3316ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3317 KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3318
3319#if (KMP_ARCH_X86)
3320ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3321 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3322ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3323 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3324#else
3325ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3326ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3327 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3328#endif // (KMP_ARCH_X86)
3329
3330// ------------------------------------------------------------------------
3331// Routines for Extended types: long double, _Quad, complex flavours (use
3332// critical section)
3333#define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3334 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3335 TYPE old_value; \
3336 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3337 CRITICAL_SWP(LCK_ID) \
3338 }
3339
3340// ------------------------------------------------------------------------
3341// !!! TODO: check if we need to return void for cmplx4 routines
3342// Workaround for cmplx4. Regular routines with return value don't work
3343// on Win_32e. Let's return captured values through the additional parameter.
3344
3345#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3346 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3347 TYPE rhs, TYPE *out) { \
3348 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3349 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3350
3351#define CRITICAL_SWP_WRK(LCK_ID) \
3352 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3353 \
3354 tmp = (*lhs); \
3355 (*lhs) = (rhs); \
3356 (*out) = tmp; \
3357 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3358 return;
3359// ------------------------------------------------------------------------
3360
3361#ifdef KMP_GOMP_COMPAT
3362#define GOMP_CRITICAL_SWP_WRK(FLAG) \
3363 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3364 KMP_CHECK_GTID; \
3365 CRITICAL_SWP_WRK(0); \
3366 }
3367#else
3368#define GOMP_CRITICAL_SWP_WRK(FLAG)
3369#endif /* KMP_GOMP_COMPAT */
3370// ------------------------------------------------------------------------
3371
3372#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3373 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3374 TYPE tmp; \
3375 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3376 CRITICAL_SWP_WRK(LCK_ID) \
3377 }
3378// The end of workaround for cmplx4
3379
3380ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3381#if KMP_HAVE_QUAD
3382ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3383#endif // KMP_HAVE_QUAD
3384// cmplx4 routine to return void
3385ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3386
3387// ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3388// __kmpc_atomic_cmplx4_swp
3389
3390ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3391ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3392#if KMP_HAVE_QUAD
3393ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3394#if (KMP_ARCH_X86)
3395ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3396 1) // __kmpc_atomic_float16_a16_swp
3397ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3398 1) // __kmpc_atomic_cmplx16_a16_swp
3399#endif // (KMP_ARCH_X86)
3400#endif // KMP_HAVE_QUAD
3401
3402// End of OpenMP 4.0 Capture
3403
3404#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3405
3406#undef OP_CRITICAL
3407
3408/* ------------------------------------------------------------------------ */
3409/* Generic atomic routines */
3410
3411void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3412 void (*f)(void *, void *, void *)) {
3413 KMP_DEBUG_ASSERT(__kmp_init_serial);
3414
3415 if (
3416#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3417 FALSE /* must use lock */
3418#else
3419 TRUE
3420#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3421 ) {
3422 kmp_int8 old_value, new_value;
3423
3424 old_value = *(kmp_int8 *)lhs;
3425 (*f)(&new_value, &old_value, rhs);
3426
3427 /* TODO: Should this be acquire or release? */
3428 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3429 *(kmp_int8 *)&new_value)) {
3430 KMP_CPU_PAUSE();
3431
3432 old_value = *(kmp_int8 *)lhs;
3433 (*f)(&new_value, &old_value, rhs);
3434 }
3435
3436 return;
3437 } else {
3438 // All 1-byte data is of integer data type.
3439
3440#ifdef KMP_GOMP_COMPAT
3441 if (__kmp_atomic_mode == 2) {
3442 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3443 } else
3444#endif /* KMP_GOMP_COMPAT */
3445 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3446
3447 (*f)(lhs, lhs, rhs);
3448
3449#ifdef KMP_GOMP_COMPAT
3450 if (__kmp_atomic_mode == 2) {
3451 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3452 } else
3453#endif /* KMP_GOMP_COMPAT */
3454 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3455 }
3456}
3457
3458void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3459 void (*f)(void *, void *, void *)) {
3460 if (
3461#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3462 FALSE /* must use lock */
3463#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3464 TRUE /* no alignment problems */
3465#else
3466 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3467#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3468 ) {
3469 kmp_int16 old_value, new_value;
3470
3471 old_value = *(kmp_int16 *)lhs;
3472 (*f)(&new_value, &old_value, rhs);
3473
3474 /* TODO: Should this be acquire or release? */
3475 while (!KMP_COMPARE_AND_STORE_ACQ16(
3476 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3477 KMP_CPU_PAUSE();
3478
3479 old_value = *(kmp_int16 *)lhs;
3480 (*f)(&new_value, &old_value, rhs);
3481 }
3482
3483 return;
3484 } else {
3485 // All 2-byte data is of integer data type.
3486
3487#ifdef KMP_GOMP_COMPAT
3488 if (__kmp_atomic_mode == 2) {
3489 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3490 } else
3491#endif /* KMP_GOMP_COMPAT */
3492 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3493
3494 (*f)(lhs, lhs, rhs);
3495
3496#ifdef KMP_GOMP_COMPAT
3497 if (__kmp_atomic_mode == 2) {
3498 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3499 } else
3500#endif /* KMP_GOMP_COMPAT */
3501 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3502 }
3503}
3504
3505void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3506 void (*f)(void *, void *, void *)) {
3507 KMP_DEBUG_ASSERT(__kmp_init_serial);
3508
3509 if (
3510// FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3511// Gomp compatibility is broken if this routine is called for floats.
3512#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3513 TRUE /* no alignment problems */
3514#else
3515 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3516#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3517 ) {
3518 kmp_int32 old_value, new_value;
3519
3520 old_value = *(kmp_int32 *)lhs;
3521 (*f)(&new_value, &old_value, rhs);
3522
3523 /* TODO: Should this be acquire or release? */
3524 while (!KMP_COMPARE_AND_STORE_ACQ32(
3525 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3526 KMP_CPU_PAUSE();
3527
3528 old_value = *(kmp_int32 *)lhs;
3529 (*f)(&new_value, &old_value, rhs);
3530 }
3531
3532 return;
3533 } else {
3534 // Use __kmp_atomic_lock_4i for all 4-byte data,
3535 // even if it isn't of integer data type.
3536
3537#ifdef KMP_GOMP_COMPAT
3538 if (__kmp_atomic_mode == 2) {
3539 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3540 } else
3541#endif /* KMP_GOMP_COMPAT */
3542 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3543
3544 (*f)(lhs, lhs, rhs);
3545
3546#ifdef KMP_GOMP_COMPAT
3547 if (__kmp_atomic_mode == 2) {
3548 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3549 } else
3550#endif /* KMP_GOMP_COMPAT */
3551 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3552 }
3553}
3554
3555void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3556 void (*f)(void *, void *, void *)) {
3557 KMP_DEBUG_ASSERT(__kmp_init_serial);
3558 if (
3559
3560#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3561 FALSE /* must use lock */
3562#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3563 TRUE /* no alignment problems */
3564#else
3565 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3566#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3567 ) {
3568 kmp_int64 old_value, new_value;
3569
3570 old_value = *(kmp_int64 *)lhs;
3571 (*f)(&new_value, &old_value, rhs);
3572 /* TODO: Should this be acquire or release? */
3573 while (!KMP_COMPARE_AND_STORE_ACQ64(
3574 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3575 KMP_CPU_PAUSE();
3576
3577 old_value = *(kmp_int64 *)lhs;
3578 (*f)(&new_value, &old_value, rhs);
3579 }
3580
3581 return;
3582 } else {
3583 // Use __kmp_atomic_lock_8i for all 8-byte data,
3584 // even if it isn't of integer data type.
3585
3586#ifdef KMP_GOMP_COMPAT
3587 if (__kmp_atomic_mode == 2) {
3588 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3589 } else
3590#endif /* KMP_GOMP_COMPAT */
3591 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3592
3593 (*f)(lhs, lhs, rhs);
3594
3595#ifdef KMP_GOMP_COMPAT
3596 if (__kmp_atomic_mode == 2) {
3597 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3598 } else
3599#endif /* KMP_GOMP_COMPAT */
3600 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3601 }
3602}
3603#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3604void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3605 void (*f)(void *, void *, void *)) {
3606 KMP_DEBUG_ASSERT(__kmp_init_serial);
3607
3608#ifdef KMP_GOMP_COMPAT
3609 if (__kmp_atomic_mode == 2) {
3610 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3611 } else
3612#endif /* KMP_GOMP_COMPAT */
3613 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3614
3615 (*f)(lhs, lhs, rhs);
3616
3617#ifdef KMP_GOMP_COMPAT
3618 if (__kmp_atomic_mode == 2) {
3619 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3620 } else
3621#endif /* KMP_GOMP_COMPAT */
3622 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3623}
3624#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3625
3626void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3627 void (*f)(void *, void *, void *)) {
3628 KMP_DEBUG_ASSERT(__kmp_init_serial);
3629
3630#ifdef KMP_GOMP_COMPAT
3631 if (__kmp_atomic_mode == 2) {
3632 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3633 } else
3634#endif /* KMP_GOMP_COMPAT */
3635 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3636
3637 (*f)(lhs, lhs, rhs);
3638
3639#ifdef KMP_GOMP_COMPAT
3640 if (__kmp_atomic_mode == 2) {
3641 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3642 } else
3643#endif /* KMP_GOMP_COMPAT */
3644 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3645}
3646#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3647void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3648 void (*f)(void *, void *, void *)) {
3649 KMP_DEBUG_ASSERT(__kmp_init_serial);
3650
3651#ifdef KMP_GOMP_COMPAT
3652 if (__kmp_atomic_mode == 2) {
3653 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3654 } else
3655#endif /* KMP_GOMP_COMPAT */
3656 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3657
3658 (*f)(lhs, lhs, rhs);
3659
3660#ifdef KMP_GOMP_COMPAT
3661 if (__kmp_atomic_mode == 2) {
3662 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3663 } else
3664#endif /* KMP_GOMP_COMPAT */
3665 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3666}
3667#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3668void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3669 void (*f)(void *, void *, void *)) {
3670 KMP_DEBUG_ASSERT(__kmp_init_serial);
3671
3672#ifdef KMP_GOMP_COMPAT
3673 if (__kmp_atomic_mode == 2) {
3674 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3675 } else
3676#endif /* KMP_GOMP_COMPAT */
3677 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3678
3679 (*f)(lhs, lhs, rhs);
3680
3681#ifdef KMP_GOMP_COMPAT
3682 if (__kmp_atomic_mode == 2) {
3683 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3684 } else
3685#endif /* KMP_GOMP_COMPAT */
3686 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3687}
3688
3689// AC: same two routines as GOMP_atomic_start/end, but will be called by our
3690// compiler; duplicated in order to not use 3-party names in pure Intel code
3691// TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3692void __kmpc_atomic_start(void) {
3693 int gtid = __kmp_entry_gtid();
3694 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3695 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3696}
3697
3698void __kmpc_atomic_end(void) {
3699 int gtid = __kmp_get_gtid();
3700 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3701 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3702}
3703
3704#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3705
3706// OpenMP 5.1 compare and swap
3707
3722bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3723 return KMP_COMPARE_AND_STORE_ACQ8(x, e, d);
3724}
3725bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
3726 short d) {
3727 return KMP_COMPARE_AND_STORE_ACQ16(x, e, d);
3728}
3729bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
3730 kmp_int32 d) {
3731 return KMP_COMPARE_AND_STORE_ACQ32(x, e, d);
3732}
3733bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
3734 kmp_int64 d) {
3735 return KMP_COMPARE_AND_STORE_ACQ64(x, e, d);
3736}
3737
3752char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3753 return KMP_COMPARE_AND_STORE_RET8(x, e, d);
3754}
3755short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
3756 short d) {
3757 return KMP_COMPARE_AND_STORE_RET16(x, e, d);
3758}
3759kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
3760 kmp_int32 e, kmp_int32 d) {
3761 return KMP_COMPARE_AND_STORE_RET32(x, e, d);
3762}
3763kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
3764 kmp_int64 e, kmp_int64 d) {
3765 return KMP_COMPARE_AND_STORE_RET64(x, e, d);
3766}
3767
3784bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3785 char d, char *pv) {
3786 char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3787 if (old == e)
3788 return true;
3789 KMP_ASSERT(pv != NULL);
3790 *pv = old;
3791 return false;
3792}
3793bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3794 short d, short *pv) {
3795 short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3796 if (old == e)
3797 return true;
3798 KMP_ASSERT(pv != NULL);
3799 *pv = old;
3800 return false;
3801}
3802bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3803 kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3804 kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3805 if (old == e)
3806 return true;
3807 KMP_ASSERT(pv != NULL);
3808 *pv = old;
3809 return false;
3810}
3811bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3812 kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3813 kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3814 if (old == e)
3815 return true;
3816 KMP_ASSERT(pv != NULL);
3817 *pv = old;
3818 return false;
3819}
3820
3837char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3838 char d, char *pv) {
3839 char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3840 KMP_ASSERT(pv != NULL);
3841 *pv = old == e ? d : old;
3842 return old;
3843}
3844short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3845 short d, short *pv) {
3846 short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3847 KMP_ASSERT(pv != NULL);
3848 *pv = old == e ? d : old;
3849 return old;
3850}
3851kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3852 kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3853 kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3854 KMP_ASSERT(pv != NULL);
3855 *pv = old == e ? d : old;
3856 return old;
3857}
3858kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3859 kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3860 kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3861 KMP_ASSERT(pv != NULL);
3862 *pv = old == e ? d : old;
3863 return old;
3864}
3865
3866// End OpenMP 5.1 compare + capture
3867#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3868
3873// end of file
Definition: kmp.h:234