Intel® OpenMP* Runtime Library
kmp_atomic.c
1 /*
2  * kmp_atomic.c -- ATOMIC implementation routines
3  */
4 
5 /* <copyright>
6  Copyright (c) 1997-2015 Intel Corporation. All Rights Reserved.
7 
8  Redistribution and use in source and binary forms, with or without
9  modification, are permitted provided that the following conditions
10  are met:
11 
12  * Redistributions of source code must retain the above copyright
13  notice, this list of conditions and the following disclaimer.
14  * Redistributions in binary form must reproduce the above copyright
15  notice, this list of conditions and the following disclaimer in the
16  documentation and/or other materials provided with the distribution.
17  * Neither the name of Intel Corporation nor the names of its
18  contributors may be used to endorse or promote products derived
19  from this software without specific prior written permission.
20 
21  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 </copyright> */
34 
35 #include "kmp_atomic.h"
36 #include "kmp.h" // TRUE, asm routines prototypes
37 
38 typedef unsigned char uchar;
39 typedef unsigned short ushort;
40 
558 /*
559  * Global vars
560  */
561 
562 #ifndef KMP_GOMP_COMPAT
563 int __kmp_atomic_mode = 1; // Intel perf
564 #else
565 int __kmp_atomic_mode = 2; // GOMP compatibility
566 #endif /* KMP_GOMP_COMPAT */
567 
568 KMP_ALIGN(128)
569 
570 kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */
571 kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */
572 kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */
573 kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */
574 kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */
575 kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */
576 kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */
577 kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */
578 kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */
579 kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */
580 kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
581 kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
582 kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
583 
584 
585 /*
586  2007-03-02:
587  Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a
588  bug on *_32 and *_32e. This is just a temporary workaround for the problem.
589  It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG
590  routines in assembler language.
591 */
592 #define KMP_ATOMIC_VOLATILE volatile
593 
594 #if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD
595 
596  static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; };
597  static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; };
598  static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; };
599  static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; };
600  static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; }
601  static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; }
602 
603  static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; };
604  static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; };
605  static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; };
606  static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; };
607  static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; }
608  static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; }
609 
610  static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; };
611  static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; };
612  static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; };
613  static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; };
614 
615  static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; };
616  static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; };
617  static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; };
618  static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; };
619 
620 #endif
621 
622 /* ------------------------------------------------------------------------ */
623 /* ATOMIC implementation routines */
624 /* one routine for each operation and operand type */
625 /* ------------------------------------------------------------------------ */
626 
627 // All routines declarations looks like
628 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
629 // ------------------------------------------------------------------------
630 
631 #define KMP_CHECK_GTID \
632  if ( gtid == KMP_GTID_UNKNOWN ) { \
633  gtid = __kmp_entry_gtid(); \
634  } // check and get gtid when needed
635 
636 // Beginning of a definition (provides name, parameters, gebug trace)
637 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
638 // OP_ID - operation identifier (add, sub, mul, ...)
639 // TYPE - operands' type
640 #define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
641 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
642 { \
643  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
644  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
645 
646 // ------------------------------------------------------------------------
647 // Lock variables used for critical sections for various size operands
648 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
649 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
650 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
651 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
652 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
653 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
654 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
655 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
656 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
657 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
658 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
659 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
660 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
661 
662 // ------------------------------------------------------------------------
663 // Operation on *lhs, rhs bound by critical section
664 // OP - operator (it's supposed to contain an assignment)
665 // LCK_ID - lock identifier
666 // Note: don't check gtid as it should always be valid
667 // 1, 2-byte - expect valid parameter, other - check before this macro
668 #define OP_CRITICAL(OP,LCK_ID) \
669  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
670  \
671  (*lhs) OP (rhs); \
672  \
673  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
674 
675 // ------------------------------------------------------------------------
676 // For GNU compatibility, we may need to use a critical section,
677 // even though it is not required by the ISA.
678 //
679 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
680 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
681 // critical section. On Intel(R) 64, all atomic operations are done with fetch
682 // and add or compare and exchange. Therefore, the FLAG parameter to this
683 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
684 // require a critical section, where we predict that they will be implemented
685 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
686 //
687 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
688 // the FLAG parameter should always be 1. If we know that we will be using
689 // a critical section, then we want to make certain that we use the generic
690 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
691 // locks that are specialized based upon the size or type of the data.
692 //
693 // If FLAG is 0, then we are relying on dead code elimination by the build
694 // compiler to get rid of the useless block of code, and save a needless
695 // branch at runtime.
696 //
697 
698 #ifdef KMP_GOMP_COMPAT
699 # define OP_GOMP_CRITICAL(OP,FLAG) \
700  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
701  KMP_CHECK_GTID; \
702  OP_CRITICAL( OP, 0 ); \
703  return; \
704  }
705 # else
706 # define OP_GOMP_CRITICAL(OP,FLAG)
707 #endif /* KMP_GOMP_COMPAT */
708 
709 #if KMP_MIC
710 # define KMP_DO_PAUSE _mm_delay_32( 1 )
711 #else
712 # define KMP_DO_PAUSE KMP_CPU_PAUSE()
713 #endif /* KMP_MIC */
714 
715 // ------------------------------------------------------------------------
716 // Operation on *lhs, rhs using "compare_and_store" routine
717 // TYPE - operands' type
718 // BITS - size in bits, used to distinguish low level calls
719 // OP - operator
720 #define OP_CMPXCHG(TYPE,BITS,OP) \
721  { \
722  TYPE old_value, new_value; \
723  old_value = *(TYPE volatile *)lhs; \
724  new_value = old_value OP rhs; \
725  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
726  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
727  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
728  { \
729  KMP_DO_PAUSE; \
730  \
731  old_value = *(TYPE volatile *)lhs; \
732  new_value = old_value OP rhs; \
733  } \
734  }
735 
736 #if USE_CMPXCHG_FIX
737 // 2007-06-25:
738 // workaround for C78287 (complex(kind=4) data type)
739 // lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm)
740 // Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro.
741 // This is a problem of the compiler.
742 // Related tracker is C76005, targeted to 11.0.
743 // I verified the asm of the workaround.
744 #define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
745  { \
746  struct _sss { \
747  TYPE cmp; \
748  kmp_int##BITS *vvv; \
749  }; \
750  struct _sss old_value, new_value; \
751  old_value.vvv = ( kmp_int##BITS * )&old_value.cmp; \
752  new_value.vvv = ( kmp_int##BITS * )&new_value.cmp; \
753  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
754  new_value.cmp = old_value.cmp OP rhs; \
755  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
756  *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
757  *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) ) \
758  { \
759  KMP_DO_PAUSE; \
760  \
761  *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \
762  new_value.cmp = old_value.cmp OP rhs; \
763  } \
764  }
765 // end of the first part of the workaround for C78287
766 #endif // USE_CMPXCHG_FIX
767 
768 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
769 
770 // ------------------------------------------------------------------------
771 // X86 or X86_64: no alignment problems ====================================
772 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
773 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
774  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
775  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
776  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
777 }
778 // -------------------------------------------------------------------------
779 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
780 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
781  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
782  OP_CMPXCHG(TYPE,BITS,OP) \
783 }
784 #if USE_CMPXCHG_FIX
785 // -------------------------------------------------------------------------
786 // workaround for C78287 (complex(kind=4) data type)
787 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
788 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
789  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
790  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
791 }
792 // end of the second part of the workaround for C78287
793 #endif
794 
795 #else
796 // -------------------------------------------------------------------------
797 // Code for other architectures that don't handle unaligned accesses.
798 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
799 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
800  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
801  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
802  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
803  KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
804  } else { \
805  KMP_CHECK_GTID; \
806  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
807  } \
808 }
809 // -------------------------------------------------------------------------
810 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
811 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
812  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
813  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
814  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
815  } else { \
816  KMP_CHECK_GTID; \
817  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
818  } \
819 }
820 #if USE_CMPXCHG_FIX
821 // -------------------------------------------------------------------------
822 // workaround for C78287 (complex(kind=4) data type)
823 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
824 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
825  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
826  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
827  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
828  } else { \
829  KMP_CHECK_GTID; \
830  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
831  } \
832 }
833 // end of the second part of the workaround for C78287
834 #endif // USE_CMPXCHG_FIX
835 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
836 
837 // Routines for ATOMIC 4-byte operands addition and subtraction
838 ATOMIC_FIXED_ADD( fixed4, add, kmp_int32, 32, +, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add
839 ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32, 32, -, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub
840 
841 ATOMIC_CMPXCHG( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add
842 ATOMIC_CMPXCHG( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub
843 
844 // Routines for ATOMIC 8-byte operands addition and subtraction
845 ATOMIC_FIXED_ADD( fixed8, add, kmp_int64, 64, +, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add
846 ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64, 64, -, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub
847 
848 ATOMIC_CMPXCHG( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add
849 ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub
850 
851 // ------------------------------------------------------------------------
852 // Entries definition for integer operands
853 // TYPE_ID - operands type and size (fixed4, float4)
854 // OP_ID - operation identifier (add, sub, mul, ...)
855 // TYPE - operand type
856 // BITS - size in bits, used to distinguish low level calls
857 // OP - operator (used in critical section)
858 // LCK_ID - lock identifier, used to possibly distinguish lock variable
859 // MASK - used for alignment check
860 
861 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
862 // ------------------------------------------------------------------------
863 // Routines for ATOMIC integer operands, other operators
864 // ------------------------------------------------------------------------
865 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
866 ATOMIC_CMPXCHG( fixed1, add, kmp_int8, 8, +, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add
867 ATOMIC_CMPXCHG( fixed1, andb, kmp_int8, 8, &, 1i, 0, 0 ) // __kmpc_atomic_fixed1_andb
868 ATOMIC_CMPXCHG( fixed1, div, kmp_int8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div
869 ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div
870 ATOMIC_CMPXCHG( fixed1, mul, kmp_int8, 8, *, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul
871 ATOMIC_CMPXCHG( fixed1, orb, kmp_int8, 8, |, 1i, 0, 0 ) // __kmpc_atomic_fixed1_orb
872 ATOMIC_CMPXCHG( fixed1, shl, kmp_int8, 8, <<, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl
873 ATOMIC_CMPXCHG( fixed1, shr, kmp_int8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr
874 ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr
875 ATOMIC_CMPXCHG( fixed1, sub, kmp_int8, 8, -, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub
876 ATOMIC_CMPXCHG( fixed1, xor, kmp_int8, 8, ^, 1i, 0, 0 ) // __kmpc_atomic_fixed1_xor
877 ATOMIC_CMPXCHG( fixed2, add, kmp_int16, 16, +, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add
878 ATOMIC_CMPXCHG( fixed2, andb, kmp_int16, 16, &, 2i, 1, 0 ) // __kmpc_atomic_fixed2_andb
879 ATOMIC_CMPXCHG( fixed2, div, kmp_int16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div
880 ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div
881 ATOMIC_CMPXCHG( fixed2, mul, kmp_int16, 16, *, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul
882 ATOMIC_CMPXCHG( fixed2, orb, kmp_int16, 16, |, 2i, 1, 0 ) // __kmpc_atomic_fixed2_orb
883 ATOMIC_CMPXCHG( fixed2, shl, kmp_int16, 16, <<, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl
884 ATOMIC_CMPXCHG( fixed2, shr, kmp_int16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr
885 ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr
886 ATOMIC_CMPXCHG( fixed2, sub, kmp_int16, 16, -, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub
887 ATOMIC_CMPXCHG( fixed2, xor, kmp_int16, 16, ^, 2i, 1, 0 ) // __kmpc_atomic_fixed2_xor
888 ATOMIC_CMPXCHG( fixed4, andb, kmp_int32, 32, &, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andb
889 ATOMIC_CMPXCHG( fixed4, div, kmp_int32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div
890 ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div
891 ATOMIC_CMPXCHG( fixed4, mul, kmp_int32, 32, *, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul
892 ATOMIC_CMPXCHG( fixed4, orb, kmp_int32, 32, |, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orb
893 ATOMIC_CMPXCHG( fixed4, shl, kmp_int32, 32, <<, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl
894 ATOMIC_CMPXCHG( fixed4, shr, kmp_int32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr
895 ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr
896 ATOMIC_CMPXCHG( fixed4, xor, kmp_int32, 32, ^, 4i, 3, 0 ) // __kmpc_atomic_fixed4_xor
897 ATOMIC_CMPXCHG( fixed8, andb, kmp_int64, 64, &, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb
898 ATOMIC_CMPXCHG( fixed8, div, kmp_int64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div
899 ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div
900 ATOMIC_CMPXCHG( fixed8, mul, kmp_int64, 64, *, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul
901 ATOMIC_CMPXCHG( fixed8, orb, kmp_int64, 64, |, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb
902 ATOMIC_CMPXCHG( fixed8, shl, kmp_int64, 64, <<, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl
903 ATOMIC_CMPXCHG( fixed8, shr, kmp_int64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr
904 ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr
905 ATOMIC_CMPXCHG( fixed8, xor, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor
906 ATOMIC_CMPXCHG( float4, div, kmp_real32, 32, /, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div
907 ATOMIC_CMPXCHG( float4, mul, kmp_real32, 32, *, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul
908 ATOMIC_CMPXCHG( float8, div, kmp_real64, 64, /, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div
909 ATOMIC_CMPXCHG( float8, mul, kmp_real64, 64, *, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul
910 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
911 
912 
913 /* ------------------------------------------------------------------------ */
914 /* Routines for C/C++ Reduction operators && and || */
915 /* ------------------------------------------------------------------------ */
916 
917 // ------------------------------------------------------------------------
918 // Need separate macros for &&, || because there is no combined assignment
919 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
920 #define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
921 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
922  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
923  OP_CRITICAL( = *lhs OP, LCK_ID ) \
924 }
925 
926 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
927 
928 // ------------------------------------------------------------------------
929 // X86 or X86_64: no alignment problems ===================================
930 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
931 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
932  OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \
933  OP_CMPXCHG(TYPE,BITS,OP) \
934 }
935 
936 #else
937 // ------------------------------------------------------------------------
938 // Code for other architectures that don't handle unaligned accesses.
939 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
940 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
941  OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG) \
942  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
943  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
944  } else { \
945  KMP_CHECK_GTID; \
946  OP_CRITICAL(= *lhs OP,LCK_ID) /* unaligned - use critical */ \
947  } \
948 }
949 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
950 
951 ATOMIC_CMPX_L( fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl
952 ATOMIC_CMPX_L( fixed1, orl, char, 8, ||, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl
953 ATOMIC_CMPX_L( fixed2, andl, short, 16, &&, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl
954 ATOMIC_CMPX_L( fixed2, orl, short, 16, ||, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl
955 ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andl
956 ATOMIC_CMPX_L( fixed4, orl, kmp_int32, 32, ||, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orl
957 ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl
958 ATOMIC_CMPX_L( fixed8, orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl
959 
960 
961 /* ------------------------------------------------------------------------- */
962 /* Routines for Fortran operators that matched no one in C: */
963 /* MAX, MIN, .EQV., .NEQV. */
964 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
965 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
966 /* ------------------------------------------------------------------------- */
967 
968 // -------------------------------------------------------------------------
969 // MIN and MAX need separate macros
970 // OP - operator to check if we need any actions?
971 #define MIN_MAX_CRITSECT(OP,LCK_ID) \
972  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
973  \
974  if ( *lhs OP rhs ) { /* still need actions? */ \
975  *lhs = rhs; \
976  } \
977  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
978 
979 // -------------------------------------------------------------------------
980 #ifdef KMP_GOMP_COMPAT
981 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG) \
982  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
983  KMP_CHECK_GTID; \
984  MIN_MAX_CRITSECT( OP, 0 ); \
985  return; \
986  }
987 #else
988 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG)
989 #endif /* KMP_GOMP_COMPAT */
990 
991 // -------------------------------------------------------------------------
992 #define MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
993  { \
994  TYPE KMP_ATOMIC_VOLATILE temp_val; \
995  TYPE old_value; \
996  temp_val = *lhs; \
997  old_value = temp_val; \
998  while ( old_value OP rhs && /* still need actions? */ \
999  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1000  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1001  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
1002  { \
1003  KMP_CPU_PAUSE(); \
1004  temp_val = *lhs; \
1005  old_value = temp_val; \
1006  } \
1007  }
1008 
1009 // -------------------------------------------------------------------------
1010 // 1-byte, 2-byte operands - use critical section
1011 #define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1012 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1013  if ( *lhs OP rhs ) { /* need actions? */ \
1014  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1015  MIN_MAX_CRITSECT(OP,LCK_ID) \
1016  } \
1017 }
1018 
1019 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1020 
1021 // -------------------------------------------------------------------------
1022 // X86 or X86_64: no alignment problems ====================================
1023 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1024 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1025  if ( *lhs OP rhs ) { \
1026  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1027  MIN_MAX_CMPXCHG(TYPE,BITS,OP) \
1028  } \
1029 }
1030 
1031 #else
1032 // -------------------------------------------------------------------------
1033 // Code for other architectures that don't handle unaligned accesses.
1034 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1035 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1036  if ( *lhs OP rhs ) { \
1037  GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \
1038  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1039  MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1040  } else { \
1041  KMP_CHECK_GTID; \
1042  MIN_MAX_CRITSECT(OP,LCK_ID) /* unaligned address */ \
1043  } \
1044  } \
1045 }
1046 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1047 
1048 MIN_MAX_COMPXCHG( fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max
1049 MIN_MAX_COMPXCHG( fixed1, min, char, 8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min
1050 MIN_MAX_COMPXCHG( fixed2, max, short, 16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max
1051 MIN_MAX_COMPXCHG( fixed2, min, short, 16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min
1052 MIN_MAX_COMPXCHG( fixed4, max, kmp_int32, 32, <, 4i, 3, 0 ) // __kmpc_atomic_fixed4_max
1053 MIN_MAX_COMPXCHG( fixed4, min, kmp_int32, 32, >, 4i, 3, 0 ) // __kmpc_atomic_fixed4_min
1054 MIN_MAX_COMPXCHG( fixed8, max, kmp_int64, 64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max
1055 MIN_MAX_COMPXCHG( fixed8, min, kmp_int64, 64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min
1056 MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max
1057 MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min
1058 MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max
1059 MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min
1060 #if KMP_HAVE_QUAD
1061 MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max
1062 MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min
1063 #if ( KMP_ARCH_X86 )
1064  MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16
1065  MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16
1066 #endif
1067 #endif
1068 // ------------------------------------------------------------------------
1069 // Need separate macros for .EQV. because of the need of complement (~)
1070 // OP ignored for critical sections, ^=~ used instead
1071 #define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1072 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1073  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1074  OP_CRITICAL(^=~,LCK_ID) /* send assignment and complement */ \
1075 }
1076 
1077 // ------------------------------------------------------------------------
1078 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1079 // ------------------------------------------------------------------------
1080 // X86 or X86_64: no alignment problems ===================================
1081 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1082 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1083  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \
1084  OP_CMPXCHG(TYPE,BITS,OP) \
1085 }
1086 // ------------------------------------------------------------------------
1087 #else
1088 // ------------------------------------------------------------------------
1089 // Code for other architectures that don't handle unaligned accesses.
1090 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1091 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1092  OP_GOMP_CRITICAL(^=~,GOMP_FLAG) \
1093  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1094  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1095  } else { \
1096  KMP_CHECK_GTID; \
1097  OP_CRITICAL(^=~,LCK_ID) /* unaligned address - use critical */ \
1098  } \
1099 }
1100 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1101 
1102 ATOMIC_CMPXCHG( fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv
1103 ATOMIC_CMPXCHG( fixed2, neqv, kmp_int16, 16, ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv
1104 ATOMIC_CMPXCHG( fixed4, neqv, kmp_int32, 32, ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv
1105 ATOMIC_CMPXCHG( fixed8, neqv, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv
1106 ATOMIC_CMPX_EQV( fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv
1107 ATOMIC_CMPX_EQV( fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv
1108 ATOMIC_CMPX_EQV( fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv
1109 ATOMIC_CMPX_EQV( fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv
1110 
1111 
1112 // ------------------------------------------------------------------------
1113 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1114 // TYPE_ID, OP_ID, TYPE - detailed above
1115 // OP - operator
1116 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1117 #define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1118 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1119  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1120  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1121 }
1122 
1123 /* ------------------------------------------------------------------------- */
1124 // routines for long double type
1125 ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add
1126 ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub
1127 ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul
1128 ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div
1129 #if KMP_HAVE_QUAD
1130 // routines for _Quad type
1131 ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add
1132 ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub
1133 ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul
1134 ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div
1135 #if ( KMP_ARCH_X86 )
1136  ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16
1137  ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16
1138  ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16
1139  ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16
1140 #endif
1141 #endif
1142 // routines for complex types
1143 
1144 #if USE_CMPXCHG_FIX
1145 // workaround for C78287 (complex(kind=4) data type)
1146 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add
1147 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub
1148 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul
1149 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div
1150 // end of the workaround for C78287
1151 #else
1152 ATOMIC_CRITICAL( cmplx4, add, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add
1153 ATOMIC_CRITICAL( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub
1154 ATOMIC_CRITICAL( cmplx4, mul, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul
1155 ATOMIC_CRITICAL( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div
1156 #endif // USE_CMPXCHG_FIX
1157 
1158 ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add
1159 ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub
1160 ATOMIC_CRITICAL( cmplx8, mul, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul
1161 ATOMIC_CRITICAL( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div
1162 ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add
1163 ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub
1164 ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul
1165 ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div
1166 #if KMP_HAVE_QUAD
1167 ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add
1168 ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub
1169 ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul
1170 ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div
1171 #if ( KMP_ARCH_X86 )
1172  ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16
1173  ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16
1174  ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16
1175  ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16
1176 #endif
1177 #endif
1178 
1179 #if OMP_40_ENABLED
1180 
1181 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1182 // Supported only on IA-32 architecture and Intel(R) 64
1183 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184 
1185 // ------------------------------------------------------------------------
1186 // Operation on *lhs, rhs bound by critical section
1187 // OP - operator (it's supposed to contain an assignment)
1188 // LCK_ID - lock identifier
1189 // Note: don't check gtid as it should always be valid
1190 // 1, 2-byte - expect valid parameter, other - check before this macro
1191 #define OP_CRITICAL_REV(OP,LCK_ID) \
1192  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1193  \
1194  (*lhs) = (rhs) OP (*lhs); \
1195  \
1196  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1197 
1198 #ifdef KMP_GOMP_COMPAT
1199 #define OP_GOMP_CRITICAL_REV(OP,FLAG) \
1200  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1201  KMP_CHECK_GTID; \
1202  OP_CRITICAL_REV( OP, 0 ); \
1203  return; \
1204  }
1205 #else
1206 #define OP_GOMP_CRITICAL_REV(OP,FLAG)
1207 #endif /* KMP_GOMP_COMPAT */
1208 
1209 
1210 // Beginning of a definition (provides name, parameters, gebug trace)
1211 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1212 // OP_ID - operation identifier (add, sub, mul, ...)
1213 // TYPE - operands' type
1214 #define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1215 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
1216 { \
1217  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1218  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid ));
1219 
1220 // ------------------------------------------------------------------------
1221 // Operation on *lhs, rhs using "compare_and_store" routine
1222 // TYPE - operands' type
1223 // BITS - size in bits, used to distinguish low level calls
1224 // OP - operator
1225 // Note: temp_val introduced in order to force the compiler to read
1226 // *lhs only once (w/o it the compiler reads *lhs twice)
1227 #define OP_CMPXCHG_REV(TYPE,BITS,OP) \
1228  { \
1229  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1230  TYPE old_value, new_value; \
1231  temp_val = *lhs; \
1232  old_value = temp_val; \
1233  new_value = rhs OP old_value; \
1234  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1235  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1236  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1237  { \
1238  KMP_DO_PAUSE; \
1239  \
1240  temp_val = *lhs; \
1241  old_value = temp_val; \
1242  new_value = rhs OP old_value; \
1243  } \
1244  }
1245 
1246 // -------------------------------------------------------------------------
1247 #define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG) \
1248 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1249  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1250  OP_CMPXCHG_REV(TYPE,BITS,OP) \
1251 }
1252 
1253 // ------------------------------------------------------------------------
1254 // Entries definition for integer operands
1255 // TYPE_ID - operands type and size (fixed4, float4)
1256 // OP_ID - operation identifier (add, sub, mul, ...)
1257 // TYPE - operand type
1258 // BITS - size in bits, used to distinguish low level calls
1259 // OP - operator (used in critical section)
1260 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1261 
1262 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1263 // ------------------------------------------------------------------------
1264 // Routines for ATOMIC integer operands, other operators
1265 // ------------------------------------------------------------------------
1266 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1267 ATOMIC_CMPXCHG_REV( fixed1, div, kmp_int8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev
1268 ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev
1269 ATOMIC_CMPXCHG_REV( fixed1, shl, kmp_int8, 8, <<, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_rev
1270 ATOMIC_CMPXCHG_REV( fixed1, shr, kmp_int8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_rev
1271 ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_rev
1272 ATOMIC_CMPXCHG_REV( fixed1, sub, kmp_int8, 8, -, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev
1273 
1274 ATOMIC_CMPXCHG_REV( fixed2, div, kmp_int16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev
1275 ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev
1276 ATOMIC_CMPXCHG_REV( fixed2, shl, kmp_int16, 16, <<, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_rev
1277 ATOMIC_CMPXCHG_REV( fixed2, shr, kmp_int16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_rev
1278 ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_rev
1279 ATOMIC_CMPXCHG_REV( fixed2, sub, kmp_int16, 16, -, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev
1280 
1281 ATOMIC_CMPXCHG_REV( fixed4, div, kmp_int32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_rev
1282 ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_rev
1283 ATOMIC_CMPXCHG_REV( fixed4, shl, kmp_int32, 32, <<, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_rev
1284 ATOMIC_CMPXCHG_REV( fixed4, shr, kmp_int32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_rev
1285 ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_rev
1286 ATOMIC_CMPXCHG_REV( fixed4, sub, kmp_int32, 32, -, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_rev
1287 
1288 ATOMIC_CMPXCHG_REV( fixed8, div, kmp_int64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev
1289 ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev
1290 ATOMIC_CMPXCHG_REV( fixed8, shl, kmp_int64, 64, <<, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_rev
1291 ATOMIC_CMPXCHG_REV( fixed8, shr, kmp_int64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_rev
1292 ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_rev
1293 ATOMIC_CMPXCHG_REV( fixed8, sub, kmp_int64, 64, -, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev
1294 
1295 ATOMIC_CMPXCHG_REV( float4, div, kmp_real32, 32, /, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev
1296 ATOMIC_CMPXCHG_REV( float4, sub, kmp_real32, 32, -, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev
1297 
1298 ATOMIC_CMPXCHG_REV( float8, div, kmp_real64, 64, /, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev
1299 ATOMIC_CMPXCHG_REV( float8, sub, kmp_real64, 64, -, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev
1300 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1301 
1302 // ------------------------------------------------------------------------
1303 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1304 // TYPE_ID, OP_ID, TYPE - detailed above
1305 // OP - operator
1306 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1307 #define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1308 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \
1309  OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \
1310  OP_CRITICAL_REV(OP,LCK_ID) \
1311 }
1312 
1313 /* ------------------------------------------------------------------------- */
1314 // routines for long double type
1315 ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev
1316 ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev
1317 #if KMP_HAVE_QUAD
1318 // routines for _Quad type
1319 ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev
1320 ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev
1321 #if ( KMP_ARCH_X86 )
1322  ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev
1323  ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev
1324 #endif
1325 #endif
1326 
1327 // routines for complex types
1328 ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev
1329 ATOMIC_CRITICAL_REV( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_rev
1330 ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_rev
1331 ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev
1332 ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev
1333 ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev
1334 #if KMP_HAVE_QUAD
1335 ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev
1336 ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev
1337 #if ( KMP_ARCH_X86 )
1338  ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev
1339  ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev
1340 #endif
1341 #endif
1342 
1343 
1344 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
1345 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1346 
1347 #endif //OMP_40_ENABLED
1348 
1349 
1350 /* ------------------------------------------------------------------------ */
1351 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1352 /* Note: in order to reduce the total number of types combinations */
1353 /* it is supposed that compiler converts RHS to longest floating type,*/
1354 /* that is _Quad, before call to any of these routines */
1355 /* Conversion to _Quad will be done by the compiler during calculation, */
1356 /* conversion back to TYPE - before the assignment, like: */
1357 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1358 /* Performance penalty expected because of SW emulation use */
1359 /* ------------------------------------------------------------------------ */
1360 
1361 #define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1362 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \
1363 { \
1364  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1365  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid ));
1366 
1367 // -------------------------------------------------------------------------
1368 #define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \
1369 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1370  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \
1371  OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \
1372 }
1373 
1374 // -------------------------------------------------------------------------
1375 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1376 // -------------------------------------------------------------------------
1377 // X86 or X86_64: no alignment problems ====================================
1378 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1379 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1380  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1381  OP_CMPXCHG(TYPE,BITS,OP) \
1382 }
1383 // -------------------------------------------------------------------------
1384 #else
1385 // ------------------------------------------------------------------------
1386 // Code for other architectures that don't handle unaligned accesses.
1387 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1388 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1389  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1390  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1391  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1392  } else { \
1393  KMP_CHECK_GTID; \
1394  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1395  } \
1396 }
1397 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1398 
1399 // RHS=float8
1400 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8
1401 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8
1402 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8
1403 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8
1404 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_float8
1405 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_float8
1406 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8
1407 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8
1408 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8
1409 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8
1410 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8
1411 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8
1412 
1413 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
1414 #if KMP_HAVE_QUAD
1415 ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp
1416 ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp
1417 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp
1418 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp
1419 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp
1420 
1421 ATOMIC_CMPXCHG_MIX( fixed2, short, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp
1422 ATOMIC_CMPXCHG_MIX( fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp
1423 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp
1424 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp
1425 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp
1426 
1427 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_fp
1428 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_fp
1429 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_fp
1430 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_fp
1431 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_fp
1432 
1433 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp
1434 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp
1435 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp
1436 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp
1437 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp
1438 
1439 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp
1440 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp
1441 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp
1442 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp
1443 
1444 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp
1445 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp
1446 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp
1447 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp
1448 
1449 ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_fp
1450 ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp
1451 ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp
1452 ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp
1453 #endif
1454 
1455 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1456 // ------------------------------------------------------------------------
1457 // X86 or X86_64: no alignment problems ====================================
1458 #if USE_CMPXCHG_FIX
1459 // workaround for C78287 (complex(kind=4) data type)
1460 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1461 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1462  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1463  OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \
1464 }
1465 // end of the second part of the workaround for C78287
1466 #else
1467 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1468 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1469  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1470  OP_CMPXCHG(TYPE,BITS,OP) \
1471 }
1472 #endif // USE_CMPXCHG_FIX
1473 #else
1474 // ------------------------------------------------------------------------
1475 // Code for other architectures that don't handle unaligned accesses.
1476 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1477 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \
1478  OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \
1479  if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \
1480  OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \
1481  } else { \
1482  KMP_CHECK_GTID; \
1483  OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \
1484  } \
1485 }
1486 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1487 
1488 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8
1489 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8
1490 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8
1491 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8
1492 
1493 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1494 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1495 
1497 // ------------------------------------------------------------------------
1498 // Atomic READ routines
1499 // ------------------------------------------------------------------------
1500 
1501 // ------------------------------------------------------------------------
1502 // Beginning of a definition (provides name, parameters, gebug trace)
1503 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1504 // OP_ID - operation identifier (add, sub, mul, ...)
1505 // TYPE - operands' type
1506 #define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1507 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \
1508 { \
1509  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1510  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1511 
1512 // ------------------------------------------------------------------------
1513 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1514 // TYPE - operands' type
1515 // BITS - size in bits, used to distinguish low level calls
1516 // OP - operator
1517 // Note: temp_val introduced in order to force the compiler to read
1518 // *lhs only once (w/o it the compiler reads *lhs twice)
1519 // TODO: check if it is still necessary
1520 // Return old value regardless of the result of "compare & swap# operation
1521 
1522 #define OP_CMPXCHG_READ(TYPE,BITS,OP) \
1523  { \
1524  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1525  union f_i_union { \
1526  TYPE f_val; \
1527  kmp_int##BITS i_val; \
1528  }; \
1529  union f_i_union old_value; \
1530  temp_val = *loc; \
1531  old_value.f_val = temp_val; \
1532  old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \
1533  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val, \
1534  *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \
1535  new_value = old_value.f_val; \
1536  return new_value; \
1537  }
1538 
1539 // -------------------------------------------------------------------------
1540 // Operation on *lhs, rhs bound by critical section
1541 // OP - operator (it's supposed to contain an assignment)
1542 // LCK_ID - lock identifier
1543 // Note: don't check gtid as it should always be valid
1544 // 1, 2-byte - expect valid parameter, other - check before this macro
1545 #define OP_CRITICAL_READ(OP,LCK_ID) \
1546  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1547  \
1548  new_value = (*loc); \
1549  \
1550  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1551 
1552 // -------------------------------------------------------------------------
1553 #ifdef KMP_GOMP_COMPAT
1554 #define OP_GOMP_CRITICAL_READ(OP,FLAG) \
1555  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1556  KMP_CHECK_GTID; \
1557  OP_CRITICAL_READ( OP, 0 ); \
1558  return new_value; \
1559  }
1560 #else
1561 #define OP_GOMP_CRITICAL_READ(OP,FLAG)
1562 #endif /* KMP_GOMP_COMPAT */
1563 
1564 // -------------------------------------------------------------------------
1565 #define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1566 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1567  TYPE new_value; \
1568  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1569  new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 ); \
1570  return new_value; \
1571 }
1572 // -------------------------------------------------------------------------
1573 #define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1574 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1575  TYPE new_value; \
1576  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \
1577  OP_CMPXCHG_READ(TYPE,BITS,OP) \
1578 }
1579 // ------------------------------------------------------------------------
1580 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1581 // TYPE_ID, OP_ID, TYPE - detailed above
1582 // OP - operator
1583 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1584 #define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1585 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \
1586  TYPE new_value; \
1587  OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) /* send assignment */ \
1588  OP_CRITICAL_READ(OP,LCK_ID) /* send assignment */ \
1589  return new_value; \
1590 }
1591 
1592 // ------------------------------------------------------------------------
1593 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work.
1594 // Let's return the read value through the additional parameter.
1595 
1596 #if ( KMP_OS_WINDOWS )
1597 
1598 #define OP_CRITICAL_READ_WRK(OP,LCK_ID) \
1599  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1600  \
1601  (*out) = (*loc); \
1602  \
1603  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1604 // ------------------------------------------------------------------------
1605 #ifdef KMP_GOMP_COMPAT
1606 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) \
1607  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1608  KMP_CHECK_GTID; \
1609  OP_CRITICAL_READ_WRK( OP, 0 ); \
1610  }
1611 #else
1612 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)
1613 #endif /* KMP_GOMP_COMPAT */
1614 // ------------------------------------------------------------------------
1615 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1616 void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \
1617 { \
1618  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1619  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1620 
1621 // ------------------------------------------------------------------------
1622 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1623 ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1624  OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG) /* send assignment */ \
1625  OP_CRITICAL_READ_WRK(OP,LCK_ID) /* send assignment */ \
1626 }
1627 
1628 #endif // KMP_OS_WINDOWS
1629 
1630 // ------------------------------------------------------------------------
1631 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1632 ATOMIC_FIXED_READ( fixed4, rd, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_rd
1633 ATOMIC_FIXED_READ( fixed8, rd, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_rd
1634 ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_rd
1635 ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_rd
1636 
1637 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
1638 ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_rd
1639 ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd
1640 
1641 ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd
1642 #if KMP_HAVE_QUAD
1643 ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd
1644 #endif // KMP_HAVE_QUAD
1645 
1646 // Fix for CQ220361 on Windows* OS
1647 #if ( KMP_OS_WINDOWS )
1648  ATOMIC_CRITICAL_READ_WRK( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1649 #else
1650  ATOMIC_CRITICAL_READ( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd
1651 #endif
1652 ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd
1653 ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd
1654 #if KMP_HAVE_QUAD
1655 ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd
1656 #if ( KMP_ARCH_X86 )
1657  ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd
1658  ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd
1659 #endif
1660 #endif
1661 
1662 
1663 // ------------------------------------------------------------------------
1664 // Atomic WRITE routines
1665 // ------------------------------------------------------------------------
1666 
1667 #define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1668 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1669  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1670  KMP_XCHG_FIXED##BITS( lhs, rhs ); \
1671 }
1672 // ------------------------------------------------------------------------
1673 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1674 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1675  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1676  KMP_XCHG_REAL##BITS( lhs, rhs ); \
1677 }
1678 
1679 
1680 // ------------------------------------------------------------------------
1681 // Operation on *lhs, rhs using "compare_and_store" routine
1682 // TYPE - operands' type
1683 // BITS - size in bits, used to distinguish low level calls
1684 // OP - operator
1685 // Note: temp_val introduced in order to force the compiler to read
1686 // *lhs only once (w/o it the compiler reads *lhs twice)
1687 #define OP_CMPXCHG_WR(TYPE,BITS,OP) \
1688  { \
1689  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1690  TYPE old_value, new_value; \
1691  temp_val = *lhs; \
1692  old_value = temp_val; \
1693  new_value = rhs; \
1694  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1695  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1696  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1697  { \
1698  KMP_CPU_PAUSE(); \
1699  \
1700  temp_val = *lhs; \
1701  old_value = temp_val; \
1702  new_value = rhs; \
1703  } \
1704  }
1705 
1706 // -------------------------------------------------------------------------
1707 #define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1708 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1709  OP_GOMP_CRITICAL(OP,GOMP_FLAG) \
1710  OP_CMPXCHG_WR(TYPE,BITS,OP) \
1711 }
1712 
1713 // ------------------------------------------------------------------------
1714 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1715 // TYPE_ID, OP_ID, TYPE - detailed above
1716 // OP - operator
1717 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1718 #define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
1719 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \
1720  OP_GOMP_CRITICAL(OP,GOMP_FLAG) /* send assignment */ \
1721  OP_CRITICAL(OP,LCK_ID) /* send assignment */ \
1722 }
1723 // -------------------------------------------------------------------------
1724 
1725 ATOMIC_XCHG_WR( fixed1, wr, kmp_int8, 8, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_wr
1726 ATOMIC_XCHG_WR( fixed2, wr, kmp_int16, 16, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_wr
1727 ATOMIC_XCHG_WR( fixed4, wr, kmp_int32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_wr
1728 #if ( KMP_ARCH_X86 )
1729  ATOMIC_CMPXCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1730 #else
1731  ATOMIC_XCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr
1732 #endif
1733 
1734 ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_float4_wr
1735 #if ( KMP_ARCH_X86 )
1736  ATOMIC_CMPXCHG_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1737 #else
1738  ATOMIC_XCHG_FLOAT_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr
1739 #endif
1740 
1741 ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr
1742 #if KMP_HAVE_QUAD
1743 ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr
1744 #endif
1745 ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr
1746 ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr
1747 ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr
1748 #if KMP_HAVE_QUAD
1749 ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr
1750 #if ( KMP_ARCH_X86 )
1751  ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr
1752  ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr
1753 #endif
1754 #endif
1755 
1756 
1757 // ------------------------------------------------------------------------
1758 // Atomic CAPTURE routines
1759 // ------------------------------------------------------------------------
1760 
1761 // Beginning of a definition (provides name, parameters, gebug trace)
1762 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1763 // OP_ID - operation identifier (add, sub, mul, ...)
1764 // TYPE - operands' type
1765 #define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE) \
1766 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \
1767 { \
1768  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
1769  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1770 
1771 // -------------------------------------------------------------------------
1772 // Operation on *lhs, rhs bound by critical section
1773 // OP - operator (it's supposed to contain an assignment)
1774 // LCK_ID - lock identifier
1775 // Note: don't check gtid as it should always be valid
1776 // 1, 2-byte - expect valid parameter, other - check before this macro
1777 #define OP_CRITICAL_CPT(OP,LCK_ID) \
1778  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1779  \
1780  if( flag ) { \
1781  (*lhs) OP rhs; \
1782  new_value = (*lhs); \
1783  } else { \
1784  new_value = (*lhs); \
1785  (*lhs) OP rhs; \
1786  } \
1787  \
1788  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1789  return new_value;
1790 
1791 // ------------------------------------------------------------------------
1792 #ifdef KMP_GOMP_COMPAT
1793 #define OP_GOMP_CRITICAL_CPT(OP,FLAG) \
1794  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1795  KMP_CHECK_GTID; \
1796  OP_CRITICAL_CPT( OP##=, 0 ); \
1797  }
1798 #else
1799 #define OP_GOMP_CRITICAL_CPT(OP,FLAG)
1800 #endif /* KMP_GOMP_COMPAT */
1801 
1802 // ------------------------------------------------------------------------
1803 // Operation on *lhs, rhs using "compare_and_store" routine
1804 // TYPE - operands' type
1805 // BITS - size in bits, used to distinguish low level calls
1806 // OP - operator
1807 // Note: temp_val introduced in order to force the compiler to read
1808 // *lhs only once (w/o it the compiler reads *lhs twice)
1809 #define OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1810  { \
1811  TYPE KMP_ATOMIC_VOLATILE temp_val; \
1812  TYPE old_value, new_value; \
1813  temp_val = *lhs; \
1814  old_value = temp_val; \
1815  new_value = old_value OP rhs; \
1816  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1817  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
1818  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
1819  { \
1820  KMP_CPU_PAUSE(); \
1821  \
1822  temp_val = *lhs; \
1823  old_value = temp_val; \
1824  new_value = old_value OP rhs; \
1825  } \
1826  if( flag ) { \
1827  return new_value; \
1828  } else \
1829  return old_value; \
1830  }
1831 
1832 // -------------------------------------------------------------------------
1833 #define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1834 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1835  TYPE new_value; \
1836  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1837  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1838 }
1839 
1840 // -------------------------------------------------------------------------
1841 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1842 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1843  TYPE old_value, new_value; \
1844  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \
1845  /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
1846  old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \
1847  if( flag ) { \
1848  return old_value OP rhs; \
1849  } else \
1850  return old_value; \
1851 }
1852 // -------------------------------------------------------------------------
1853 
1854 ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_add_cpt
1855 ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32, 32, -, 0 ) // __kmpc_atomic_fixed4_sub_cpt
1856 ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt
1857 ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt
1858 
1859 ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt
1860 ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt
1861 ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt
1862 ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt
1863 
1864 // ------------------------------------------------------------------------
1865 // Entries definition for integer operands
1866 // TYPE_ID - operands type and size (fixed4, float4)
1867 // OP_ID - operation identifier (add, sub, mul, ...)
1868 // TYPE - operand type
1869 // BITS - size in bits, used to distinguish low level calls
1870 // OP - operator (used in critical section)
1871 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
1872 // ------------------------------------------------------------------------
1873 // Routines for ATOMIC integer operands, other operators
1874 // ------------------------------------------------------------------------
1875 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1876 ATOMIC_CMPXCHG_CPT( fixed1, add_cpt, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt
1877 ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8, 8, &, 0 ) // __kmpc_atomic_fixed1_andb_cpt
1878 ATOMIC_CMPXCHG_CPT( fixed1, div_cpt, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt
1879 ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt
1880 ATOMIC_CMPXCHG_CPT( fixed1, mul_cpt, kmp_int8, 8, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt
1881 ATOMIC_CMPXCHG_CPT( fixed1, orb_cpt, kmp_int8, 8, |, 0 ) // __kmpc_atomic_fixed1_orb_cpt
1882 ATOMIC_CMPXCHG_CPT( fixed1, shl_cpt, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt
1883 ATOMIC_CMPXCHG_CPT( fixed1, shr_cpt, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt
1884 ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt
1885 ATOMIC_CMPXCHG_CPT( fixed1, sub_cpt, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt
1886 ATOMIC_CMPXCHG_CPT( fixed1, xor_cpt, kmp_int8, 8, ^, 0 ) // __kmpc_atomic_fixed1_xor_cpt
1887 ATOMIC_CMPXCHG_CPT( fixed2, add_cpt, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt
1888 ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16, 16, &, 0 ) // __kmpc_atomic_fixed2_andb_cpt
1889 ATOMIC_CMPXCHG_CPT( fixed2, div_cpt, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt
1890 ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt
1891 ATOMIC_CMPXCHG_CPT( fixed2, mul_cpt, kmp_int16, 16, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt
1892 ATOMIC_CMPXCHG_CPT( fixed2, orb_cpt, kmp_int16, 16, |, 0 ) // __kmpc_atomic_fixed2_orb_cpt
1893 ATOMIC_CMPXCHG_CPT( fixed2, shl_cpt, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt
1894 ATOMIC_CMPXCHG_CPT( fixed2, shr_cpt, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt
1895 ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt
1896 ATOMIC_CMPXCHG_CPT( fixed2, sub_cpt, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt
1897 ATOMIC_CMPXCHG_CPT( fixed2, xor_cpt, kmp_int16, 16, ^, 0 ) // __kmpc_atomic_fixed2_xor_cpt
1898 ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32, 32, &, 0 ) // __kmpc_atomic_fixed4_andb_cpt
1899 ATOMIC_CMPXCHG_CPT( fixed4, div_cpt, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt
1900 ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt
1901 ATOMIC_CMPXCHG_CPT( fixed4, mul_cpt, kmp_int32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul_cpt
1902 ATOMIC_CMPXCHG_CPT( fixed4, orb_cpt, kmp_int32, 32, |, 0 ) // __kmpc_atomic_fixed4_orb_cpt
1903 ATOMIC_CMPXCHG_CPT( fixed4, shl_cpt, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt
1904 ATOMIC_CMPXCHG_CPT( fixed4, shr_cpt, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt
1905 ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt
1906 ATOMIC_CMPXCHG_CPT( fixed4, xor_cpt, kmp_int32, 32, ^, 0 ) // __kmpc_atomic_fixed4_xor_cpt
1907 ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64, 64, &, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb_cpt
1908 ATOMIC_CMPXCHG_CPT( fixed8, div_cpt, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt
1909 ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt
1910 ATOMIC_CMPXCHG_CPT( fixed8, mul_cpt, kmp_int64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt
1911 ATOMIC_CMPXCHG_CPT( fixed8, orb_cpt, kmp_int64, 64, |, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb_cpt
1912 ATOMIC_CMPXCHG_CPT( fixed8, shl_cpt, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt
1913 ATOMIC_CMPXCHG_CPT( fixed8, shr_cpt, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt
1914 ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt
1915 ATOMIC_CMPXCHG_CPT( fixed8, xor_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor_cpt
1916 ATOMIC_CMPXCHG_CPT( float4, div_cpt, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt
1917 ATOMIC_CMPXCHG_CPT( float4, mul_cpt, kmp_real32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt
1918 ATOMIC_CMPXCHG_CPT( float8, div_cpt, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt
1919 ATOMIC_CMPXCHG_CPT( float8, mul_cpt, kmp_real64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt
1920 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1921 
1922 // ------------------------------------------------------------------------
1923 // Routines for C/C++ Reduction operators && and ||
1924 // ------------------------------------------------------------------------
1925 
1926 // -------------------------------------------------------------------------
1927 // Operation on *lhs, rhs bound by critical section
1928 // OP - operator (it's supposed to contain an assignment)
1929 // LCK_ID - lock identifier
1930 // Note: don't check gtid as it should always be valid
1931 // 1, 2-byte - expect valid parameter, other - check before this macro
1932 #define OP_CRITICAL_L_CPT(OP,LCK_ID) \
1933  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1934  \
1935  if( flag ) { \
1936  new_value OP rhs; \
1937  } else \
1938  new_value = (*lhs); \
1939  \
1940  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1941 
1942 // ------------------------------------------------------------------------
1943 #ifdef KMP_GOMP_COMPAT
1944 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) \
1945  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
1946  KMP_CHECK_GTID; \
1947  OP_CRITICAL_L_CPT( OP, 0 ); \
1948  return new_value; \
1949  }
1950 #else
1951 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)
1952 #endif /* KMP_GOMP_COMPAT */
1953 
1954 // ------------------------------------------------------------------------
1955 // Need separate macros for &&, || because there is no combined assignment
1956 #define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
1957 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
1958  TYPE new_value; \
1959  OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG ) \
1960  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
1961 }
1962 
1963 ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char, 8, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl_cpt
1964 ATOMIC_CMPX_L_CPT( fixed1, orl_cpt, char, 8, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl_cpt
1965 ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short, 16, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl_cpt
1966 ATOMIC_CMPX_L_CPT( fixed2, orl_cpt, short, 16, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl_cpt
1967 ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 ) // __kmpc_atomic_fixed4_andl_cpt
1968 ATOMIC_CMPX_L_CPT( fixed4, orl_cpt, kmp_int32, 32, ||, 0 ) // __kmpc_atomic_fixed4_orl_cpt
1969 ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl_cpt
1970 ATOMIC_CMPX_L_CPT( fixed8, orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl_cpt
1971 
1972 
1973 // -------------------------------------------------------------------------
1974 // Routines for Fortran operators that matched no one in C:
1975 // MAX, MIN, .EQV., .NEQV.
1976 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
1977 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
1978 // -------------------------------------------------------------------------
1979 
1980 // -------------------------------------------------------------------------
1981 // MIN and MAX need separate macros
1982 // OP - operator to check if we need any actions?
1983 #define MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
1984  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1985  \
1986  if ( *lhs OP rhs ) { /* still need actions? */ \
1987  old_value = *lhs; \
1988  *lhs = rhs; \
1989  if ( flag ) \
1990  new_value = rhs; \
1991  else \
1992  new_value = old_value; \
1993  } \
1994  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
1995  return new_value; \
1996 
1997 // -------------------------------------------------------------------------
1998 #ifdef KMP_GOMP_COMPAT
1999 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) \
2000  if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \
2001  KMP_CHECK_GTID; \
2002  MIN_MAX_CRITSECT_CPT( OP, 0 ); \
2003  }
2004 #else
2005 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)
2006 #endif /* KMP_GOMP_COMPAT */
2007 
2008 // -------------------------------------------------------------------------
2009 #define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2010  { \
2011  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2012  /*TYPE old_value; */ \
2013  temp_val = *lhs; \
2014  old_value = temp_val; \
2015  while ( old_value OP rhs && /* still need actions? */ \
2016  ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2017  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2018  *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \
2019  { \
2020  KMP_CPU_PAUSE(); \
2021  temp_val = *lhs; \
2022  old_value = temp_val; \
2023  } \
2024  if( flag ) \
2025  return rhs; \
2026  else \
2027  return old_value; \
2028  }
2029 
2030 // -------------------------------------------------------------------------
2031 // 1-byte, 2-byte operands - use critical section
2032 #define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2033 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2034  TYPE new_value, old_value; \
2035  if ( *lhs OP rhs ) { /* need actions? */ \
2036  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2037  MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \
2038  } \
2039  return *lhs; \
2040 }
2041 
2042 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2043 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2044  TYPE new_value, old_value; \
2045  if ( *lhs OP rhs ) { \
2046  GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \
2047  MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \
2048  } \
2049  return *lhs; \
2050 }
2051 
2052 
2053 MIN_MAX_COMPXCHG_CPT( fixed1, max_cpt, char, 8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt
2054 MIN_MAX_COMPXCHG_CPT( fixed1, min_cpt, char, 8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt
2055 MIN_MAX_COMPXCHG_CPT( fixed2, max_cpt, short, 16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt
2056 MIN_MAX_COMPXCHG_CPT( fixed2, min_cpt, short, 16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt
2057 MIN_MAX_COMPXCHG_CPT( fixed4, max_cpt, kmp_int32, 32, <, 0 ) // __kmpc_atomic_fixed4_max_cpt
2058 MIN_MAX_COMPXCHG_CPT( fixed4, min_cpt, kmp_int32, 32, >, 0 ) // __kmpc_atomic_fixed4_min_cpt
2059 MIN_MAX_COMPXCHG_CPT( fixed8, max_cpt, kmp_int64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt
2060 MIN_MAX_COMPXCHG_CPT( fixed8, min_cpt, kmp_int64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt
2061 MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt
2062 MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt
2063 MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt
2064 MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt
2065 #if KMP_HAVE_QUAD
2066 MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt
2067 MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt
2068 #if ( KMP_ARCH_X86 )
2069  MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt
2070  MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt
2071 #endif
2072 #endif
2073 
2074 // ------------------------------------------------------------------------
2075 #ifdef KMP_GOMP_COMPAT
2076 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) \
2077  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2078  KMP_CHECK_GTID; \
2079  OP_CRITICAL_CPT( OP, 0 ); \
2080  }
2081 #else
2082 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)
2083 #endif /* KMP_GOMP_COMPAT */
2084 // ------------------------------------------------------------------------
2085 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2086 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2087  TYPE new_value; \
2088  OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG) /* send assignment */ \
2089  OP_CMPXCHG_CPT(TYPE,BITS,OP) \
2090 }
2091 
2092 // ------------------------------------------------------------------------
2093 
2094 ATOMIC_CMPXCHG_CPT( fixed1, neqv_cpt, kmp_int8, 8, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt
2095 ATOMIC_CMPXCHG_CPT( fixed2, neqv_cpt, kmp_int16, 16, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt
2096 ATOMIC_CMPXCHG_CPT( fixed4, neqv_cpt, kmp_int32, 32, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt
2097 ATOMIC_CMPXCHG_CPT( fixed8, neqv_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt
2098 ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt, kmp_int8, 8, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt
2099 ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt, kmp_int16, 16, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt
2100 ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt, kmp_int32, 32, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt
2101 ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt, kmp_int64, 64, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt
2102 
2103 // ------------------------------------------------------------------------
2104 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2105 // TYPE_ID, OP_ID, TYPE - detailed above
2106 // OP - operator
2107 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2108 #define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2109 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2110  TYPE new_value; \
2111  OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \
2112  OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \
2113 }
2114 
2115 // ------------------------------------------------------------------------
2116 
2117 // Workaround for cmplx4. Regular routines with return value don't work
2118 // on Win_32e. Let's return captured values through the additional parameter.
2119 #define OP_CRITICAL_CPT_WRK(OP,LCK_ID) \
2120  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2121  \
2122  if( flag ) { \
2123  (*lhs) OP rhs; \
2124  (*out) = (*lhs); \
2125  } else { \
2126  (*out) = (*lhs); \
2127  (*lhs) OP rhs; \
2128  } \
2129  \
2130  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2131  return;
2132 // ------------------------------------------------------------------------
2133 
2134 #ifdef KMP_GOMP_COMPAT
2135 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) \
2136  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2137  KMP_CHECK_GTID; \
2138  OP_CRITICAL_CPT_WRK( OP##=, 0 ); \
2139  }
2140 #else
2141 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)
2142 #endif /* KMP_GOMP_COMPAT */
2143 // ------------------------------------------------------------------------
2144 
2145 #define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2146 void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \
2147 { \
2148  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2149  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
2150 // ------------------------------------------------------------------------
2151 
2152 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2153 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2154  OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG) \
2155  OP_CRITICAL_CPT_WRK(OP##=,LCK_ID) \
2156 }
2157 // The end of workaround for cmplx4
2158 
2159 /* ------------------------------------------------------------------------- */
2160 // routines for long double type
2161 ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add_cpt
2162 ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt
2163 ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt
2164 ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt
2165 #if KMP_HAVE_QUAD
2166 // routines for _Quad type
2167 ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt
2168 ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt
2169 ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul_cpt
2170 ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt
2171 #if ( KMP_ARCH_X86 )
2172  ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16_cpt
2173  ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt
2174  ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt
2175  ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt
2176 #endif
2177 #endif
2178 
2179 // routines for complex types
2180 
2181 // cmplx4 routines to return void
2182 ATOMIC_CRITICAL_CPT_WRK( cmplx4, add_cpt, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add_cpt
2183 ATOMIC_CRITICAL_CPT_WRK( cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt
2184 ATOMIC_CRITICAL_CPT_WRK( cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul_cpt
2185 ATOMIC_CRITICAL_CPT_WRK( cmplx4, div_cpt, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt
2186 
2187 ATOMIC_CRITICAL_CPT( cmplx8, add_cpt, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add_cpt
2188 ATOMIC_CRITICAL_CPT( cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt
2189 ATOMIC_CRITICAL_CPT( cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul_cpt
2190 ATOMIC_CRITICAL_CPT( cmplx8, div_cpt, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt
2191 ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add_cpt
2192 ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt
2193 ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt
2194 ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt
2195 #if KMP_HAVE_QUAD
2196 ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt
2197 ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt
2198 ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt
2199 ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt
2200 #if ( KMP_ARCH_X86 )
2201  ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16_cpt
2202  ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt
2203  ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt
2204  ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt
2205 #endif
2206 #endif
2207 
2208 #if OMP_40_ENABLED
2209 
2210 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations.
2211 // Supported only on IA-32 architecture and Intel(R) 64
2212 
2213 // -------------------------------------------------------------------------
2214 // Operation on *lhs, rhs bound by critical section
2215 // OP - operator (it's supposed to contain an assignment)
2216 // LCK_ID - lock identifier
2217 // Note: don't check gtid as it should always be valid
2218 // 1, 2-byte - expect valid parameter, other - check before this macro
2219 #define OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2220  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2221  \
2222  if( flag ) { \
2223  /*temp_val = (*lhs);*/\
2224  (*lhs) = (rhs) OP (*lhs); \
2225  new_value = (*lhs); \
2226  } else { \
2227  new_value = (*lhs);\
2228  (*lhs) = (rhs) OP (*lhs); \
2229  } \
2230  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2231  return new_value;
2232 
2233 // ------------------------------------------------------------------------
2234 #ifdef KMP_GOMP_COMPAT
2235 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) \
2236  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2237  KMP_CHECK_GTID; \
2238  OP_CRITICAL_CPT_REV( OP, 0 ); \
2239  }
2240 #else
2241 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)
2242 #endif /* KMP_GOMP_COMPAT */
2243 
2244 // ------------------------------------------------------------------------
2245 // Operation on *lhs, rhs using "compare_and_store" routine
2246 // TYPE - operands' type
2247 // BITS - size in bits, used to distinguish low level calls
2248 // OP - operator
2249 // Note: temp_val introduced in order to force the compiler to read
2250 // *lhs only once (w/o it the compiler reads *lhs twice)
2251 #define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2252  { \
2253  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2254  TYPE old_value, new_value; \
2255  temp_val = *lhs; \
2256  old_value = temp_val; \
2257  new_value = rhs OP old_value; \
2258  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2259  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2260  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2261  { \
2262  KMP_CPU_PAUSE(); \
2263  \
2264  temp_val = *lhs; \
2265  old_value = temp_val; \
2266  new_value = rhs OP old_value; \
2267  } \
2268  if( flag ) { \
2269  return new_value; \
2270  } else \
2271  return old_value; \
2272  }
2273 
2274 // -------------------------------------------------------------------------
2275 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \
2276 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2277  TYPE new_value; \
2278  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2279  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2280  OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \
2281 }
2282 
2283 
2284 ATOMIC_CMPXCHG_CPT_REV( fixed1, div_cpt_rev, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev
2285 ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev
2286 ATOMIC_CMPXCHG_CPT_REV( fixed1, shl_cpt_rev, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt_rev
2287 ATOMIC_CMPXCHG_CPT_REV( fixed1, shr_cpt_rev, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt_rev
2288 ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt_rev
2289 ATOMIC_CMPXCHG_CPT_REV( fixed1, sub_cpt_rev, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev
2290 ATOMIC_CMPXCHG_CPT_REV( fixed2, div_cpt_rev, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev
2291 ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev
2292 ATOMIC_CMPXCHG_CPT_REV( fixed2, shl_cpt_rev, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt_rev
2293 ATOMIC_CMPXCHG_CPT_REV( fixed2, shr_cpt_rev, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt_rev
2294 ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt_rev
2295 ATOMIC_CMPXCHG_CPT_REV( fixed2, sub_cpt_rev, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev
2296 ATOMIC_CMPXCHG_CPT_REV( fixed4, div_cpt_rev, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt_rev
2297 ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt_rev
2298 ATOMIC_CMPXCHG_CPT_REV( fixed4, shl_cpt_rev, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt_rev
2299 ATOMIC_CMPXCHG_CPT_REV( fixed4, shr_cpt_rev, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt_rev
2300 ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt_rev
2301 ATOMIC_CMPXCHG_CPT_REV( fixed4, sub_cpt_rev, kmp_int32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_cpt_rev
2302 ATOMIC_CMPXCHG_CPT_REV( fixed8, div_cpt_rev, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev
2303 ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev
2304 ATOMIC_CMPXCHG_CPT_REV( fixed8, shl_cpt_rev, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt_rev
2305 ATOMIC_CMPXCHG_CPT_REV( fixed8, shr_cpt_rev, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt_rev
2306 ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt_rev
2307 ATOMIC_CMPXCHG_CPT_REV( fixed8, sub_cpt_rev, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev
2308 ATOMIC_CMPXCHG_CPT_REV( float4, div_cpt_rev, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev
2309 ATOMIC_CMPXCHG_CPT_REV( float4, sub_cpt_rev, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev
2310 ATOMIC_CMPXCHG_CPT_REV( float8, div_cpt_rev, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev
2311 ATOMIC_CMPXCHG_CPT_REV( float8, sub_cpt_rev, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev
2312 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2313 
2314 
2315 // ------------------------------------------------------------------------
2316 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2317 // TYPE_ID, OP_ID, TYPE - detailed above
2318 // OP - operator
2319 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2320 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2321 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \
2322  TYPE new_value; \
2323  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2324  /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\
2325  OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \
2326  OP_CRITICAL_CPT_REV(OP,LCK_ID) \
2327 }
2328 
2329 
2330 /* ------------------------------------------------------------------------- */
2331 // routines for long double type
2332 ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev
2333 ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev
2334 #if KMP_HAVE_QUAD
2335 // routines for _Quad type
2336 ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev
2337 ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev
2338 #if ( KMP_ARCH_X86 )
2339  ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev
2340  ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev
2341 #endif
2342 #endif
2343 
2344 // routines for complex types
2345 
2346 // ------------------------------------------------------------------------
2347 
2348 // Workaround for cmplx4. Regular routines with return value don't work
2349 // on Win_32e. Let's return captured values through the additional parameter.
2350 #define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2351  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2352  \
2353  if( flag ) { \
2354  (*lhs) = (rhs) OP (*lhs); \
2355  (*out) = (*lhs); \
2356  } else { \
2357  (*out) = (*lhs); \
2358  (*lhs) = (rhs) OP (*lhs); \
2359  } \
2360  \
2361  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2362  return;
2363 // ------------------------------------------------------------------------
2364 
2365 #ifdef KMP_GOMP_COMPAT
2366 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) \
2367  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2368  KMP_CHECK_GTID; \
2369  OP_CRITICAL_CPT_REV_WRK( OP, 0 ); \
2370  }
2371 #else
2372 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)
2373 #endif /* KMP_GOMP_COMPAT */
2374 // ------------------------------------------------------------------------
2375 
2376 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2377 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \
2378  OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG) \
2379  OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \
2380 }
2381 // The end of workaround for cmplx4
2382 
2383 
2384 // !!! TODO: check if we need to return void for cmplx4 routines
2385 // cmplx4 routines to return void
2386 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt_rev
2387 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt_rev
2388 
2389 ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt_rev
2390 ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev
2391 ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev
2392 ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev
2393 #if KMP_HAVE_QUAD
2394 ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev
2395 ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev
2396 #if ( KMP_ARCH_X86 )
2397  ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
2398  ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
2399 #endif
2400 #endif
2401 
2402 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
2403 
2404 #define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2405 TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
2406 { \
2407  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2408  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2409 
2410 #define CRITICAL_SWP(LCK_ID) \
2411  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2412  \
2413  old_value = (*lhs); \
2414  (*lhs) = rhs; \
2415  \
2416  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2417  return old_value;
2418 
2419 // ------------------------------------------------------------------------
2420 #ifdef KMP_GOMP_COMPAT
2421 #define GOMP_CRITICAL_SWP(FLAG) \
2422  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2423  KMP_CHECK_GTID; \
2424  CRITICAL_SWP( 0 ); \
2425  }
2426 #else
2427 #define GOMP_CRITICAL_SWP(FLAG)
2428 #endif /* KMP_GOMP_COMPAT */
2429 
2430 
2431 #define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2432 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2433  TYPE old_value; \
2434  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2435  old_value = KMP_XCHG_FIXED##BITS( lhs, rhs ); \
2436  return old_value; \
2437 }
2438 // ------------------------------------------------------------------------
2439 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2440 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2441  TYPE old_value; \
2442  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2443  old_value = KMP_XCHG_REAL##BITS( lhs, rhs ); \
2444  return old_value; \
2445 }
2446 
2447 // ------------------------------------------------------------------------
2448 #define CMPXCHG_SWP(TYPE,BITS) \
2449  { \
2450  TYPE KMP_ATOMIC_VOLATILE temp_val; \
2451  TYPE old_value, new_value; \
2452  temp_val = *lhs; \
2453  old_value = temp_val; \
2454  new_value = rhs; \
2455  while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2456  *VOLATILE_CAST(kmp_int##BITS *) &old_value, \
2457  *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \
2458  { \
2459  KMP_CPU_PAUSE(); \
2460  \
2461  temp_val = *lhs; \
2462  old_value = temp_val; \
2463  new_value = rhs; \
2464  } \
2465  return old_value; \
2466  }
2467 
2468 // -------------------------------------------------------------------------
2469 #define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \
2470 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2471  TYPE old_value; \
2472  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2473  CMPXCHG_SWP(TYPE,BITS) \
2474 }
2475 
2476 ATOMIC_XCHG_SWP( fixed1, kmp_int8, 8, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_swp
2477 ATOMIC_XCHG_SWP( fixed2, kmp_int16, 16, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_swp
2478 ATOMIC_XCHG_SWP( fixed4, kmp_int32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_swp
2479 
2480 ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_float4_swp
2481 
2482 #if ( KMP_ARCH_X86 )
2483  ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2484  ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2485 #else
2486  ATOMIC_XCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp
2487  ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp
2488 #endif
2489 
2490 // ------------------------------------------------------------------------
2491 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2492 #define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG) \
2493 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \
2494  TYPE old_value; \
2495  GOMP_CRITICAL_SWP(GOMP_FLAG) \
2496  CRITICAL_SWP(LCK_ID) \
2497 }
2498 
2499 // ------------------------------------------------------------------------
2500 
2501 // !!! TODO: check if we need to return void for cmplx4 routines
2502 // Workaround for cmplx4. Regular routines with return value don't work
2503 // on Win_32e. Let's return captured values through the additional parameter.
2504 
2505 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2506 void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out ) \
2507 { \
2508  KMP_DEBUG_ASSERT( __kmp_init_serial ); \
2509  KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2510 
2511 
2512 #define CRITICAL_SWP_WRK(LCK_ID) \
2513  __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2514  \
2515  tmp = (*lhs); \
2516  (*lhs) = (rhs); \
2517  (*out) = tmp; \
2518  __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \
2519  return;
2520 
2521 // ------------------------------------------------------------------------
2522 
2523 #ifdef KMP_GOMP_COMPAT
2524 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
2525  if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \
2526  KMP_CHECK_GTID; \
2527  CRITICAL_SWP_WRK( 0 ); \
2528  }
2529 #else
2530 #define GOMP_CRITICAL_SWP_WRK(FLAG)
2531 #endif /* KMP_GOMP_COMPAT */
2532 // ------------------------------------------------------------------------
2533 
2534 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG) \
2535 ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \
2536  TYPE tmp; \
2537  GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
2538  CRITICAL_SWP_WRK(LCK_ID) \
2539 }
2540 // The end of workaround for cmplx4
2541 
2542 
2543 ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp
2544 #if KMP_HAVE_QUAD
2545 ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp
2546 #endif
2547 // cmplx4 routine to return void
2548 ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2549 
2550 //ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp
2551 
2552 
2553 ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp
2554 ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp
2555 #if KMP_HAVE_QUAD
2556 ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp
2557 #if ( KMP_ARCH_X86 )
2558  ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp
2559  ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp
2560 #endif
2561 #endif
2562 
2563 
2564 // End of OpenMP 4.0 Capture
2565 
2566 #endif //OMP_40_ENABLED
2567 
2568 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
2569 
2570 
2571 #undef OP_CRITICAL
2572 
2573 /* ------------------------------------------------------------------------ */
2574 /* Generic atomic routines */
2575 /* ------------------------------------------------------------------------ */
2576 
2577 void
2578 __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2579 {
2580  KMP_DEBUG_ASSERT( __kmp_init_serial );
2581 
2582  if (
2583 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2584  FALSE /* must use lock */
2585 #else
2586  TRUE
2587 #endif
2588  )
2589  {
2590  kmp_int8 old_value, new_value;
2591 
2592  old_value = *(kmp_int8 *) lhs;
2593  (*f)( &new_value, &old_value, rhs );
2594 
2595  /* TODO: Should this be acquire or release? */
2596  while ( ! KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs,
2597  *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) )
2598  {
2599  KMP_CPU_PAUSE();
2600 
2601  old_value = *(kmp_int8 *) lhs;
2602  (*f)( &new_value, &old_value, rhs );
2603  }
2604 
2605  return;
2606  }
2607  else {
2608  //
2609  // All 1-byte data is of integer data type.
2610  //
2611 
2612 #ifdef KMP_GOMP_COMPAT
2613  if ( __kmp_atomic_mode == 2 ) {
2614  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2615  }
2616  else
2617 #endif /* KMP_GOMP_COMPAT */
2618  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2619 
2620  (*f)( lhs, lhs, rhs );
2621 
2622 #ifdef KMP_GOMP_COMPAT
2623  if ( __kmp_atomic_mode == 2 ) {
2624  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2625  }
2626  else
2627 #endif /* KMP_GOMP_COMPAT */
2628  __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2629  }
2630 }
2631 
2632 void
2633 __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2634 {
2635  if (
2636 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2637  FALSE /* must use lock */
2638 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2639  TRUE /* no alignment problems */
2640 #else
2641  ! ( (kmp_uintptr_t) lhs & 0x1) /* make sure address is 2-byte aligned */
2642 #endif
2643  )
2644  {
2645  kmp_int16 old_value, new_value;
2646 
2647  old_value = *(kmp_int16 *) lhs;
2648  (*f)( &new_value, &old_value, rhs );
2649 
2650  /* TODO: Should this be acquire or release? */
2651  while ( ! KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs,
2652  *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) )
2653  {
2654  KMP_CPU_PAUSE();
2655 
2656  old_value = *(kmp_int16 *) lhs;
2657  (*f)( &new_value, &old_value, rhs );
2658  }
2659 
2660  return;
2661  }
2662  else {
2663  //
2664  // All 2-byte data is of integer data type.
2665  //
2666 
2667 #ifdef KMP_GOMP_COMPAT
2668  if ( __kmp_atomic_mode == 2 ) {
2669  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2670  }
2671  else
2672 #endif /* KMP_GOMP_COMPAT */
2673  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2674 
2675  (*f)( lhs, lhs, rhs );
2676 
2677 #ifdef KMP_GOMP_COMPAT
2678  if ( __kmp_atomic_mode == 2 ) {
2679  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2680  }
2681  else
2682 #endif /* KMP_GOMP_COMPAT */
2683  __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2684  }
2685 }
2686 
2687 void
2688 __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2689 {
2690  KMP_DEBUG_ASSERT( __kmp_init_serial );
2691 
2692  if (
2693  //
2694  // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
2695  // Gomp compatibility is broken if this routine is called for floats.
2696  //
2697 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2698  TRUE /* no alignment problems */
2699 #else
2700  ! ( (kmp_uintptr_t) lhs & 0x3) /* make sure address is 4-byte aligned */
2701 #endif
2702  )
2703  {
2704  kmp_int32 old_value, new_value;
2705 
2706  old_value = *(kmp_int32 *) lhs;
2707  (*f)( &new_value, &old_value, rhs );
2708 
2709  /* TODO: Should this be acquire or release? */
2710  while ( ! KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs,
2711  *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) )
2712  {
2713  KMP_CPU_PAUSE();
2714 
2715  old_value = *(kmp_int32 *) lhs;
2716  (*f)( &new_value, &old_value, rhs );
2717  }
2718 
2719  return;
2720  }
2721  else {
2722  //
2723  // Use __kmp_atomic_lock_4i for all 4-byte data,
2724  // even if it isn't of integer data type.
2725  //
2726 
2727 #ifdef KMP_GOMP_COMPAT
2728  if ( __kmp_atomic_mode == 2 ) {
2729  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2730  }
2731  else
2732 #endif /* KMP_GOMP_COMPAT */
2733  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2734 
2735  (*f)( lhs, lhs, rhs );
2736 
2737 #ifdef KMP_GOMP_COMPAT
2738  if ( __kmp_atomic_mode == 2 ) {
2739  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2740  }
2741  else
2742 #endif /* KMP_GOMP_COMPAT */
2743  __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2744  }
2745 }
2746 
2747 void
2748 __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2749 {
2750  KMP_DEBUG_ASSERT( __kmp_init_serial );
2751  if (
2752 
2753 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2754  FALSE /* must use lock */
2755 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2756  TRUE /* no alignment problems */
2757 #else
2758  ! ( (kmp_uintptr_t) lhs & 0x7) /* make sure address is 8-byte aligned */
2759 #endif
2760  )
2761  {
2762  kmp_int64 old_value, new_value;
2763 
2764  old_value = *(kmp_int64 *) lhs;
2765  (*f)( &new_value, &old_value, rhs );
2766  /* TODO: Should this be acquire or release? */
2767  while ( ! KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs,
2768  *(kmp_int64 *) &old_value,
2769  *(kmp_int64 *) &new_value ) )
2770  {
2771  KMP_CPU_PAUSE();
2772 
2773  old_value = *(kmp_int64 *) lhs;
2774  (*f)( &new_value, &old_value, rhs );
2775  }
2776 
2777  return;
2778  } else {
2779  //
2780  // Use __kmp_atomic_lock_8i for all 8-byte data,
2781  // even if it isn't of integer data type.
2782  //
2783 
2784 #ifdef KMP_GOMP_COMPAT
2785  if ( __kmp_atomic_mode == 2 ) {
2786  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2787  }
2788  else
2789 #endif /* KMP_GOMP_COMPAT */
2790  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2791 
2792  (*f)( lhs, lhs, rhs );
2793 
2794 #ifdef KMP_GOMP_COMPAT
2795  if ( __kmp_atomic_mode == 2 ) {
2796  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2797  }
2798  else
2799 #endif /* KMP_GOMP_COMPAT */
2800  __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2801  }
2802 }
2803 
2804 void
2805 __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2806 {
2807  KMP_DEBUG_ASSERT( __kmp_init_serial );
2808 
2809 #ifdef KMP_GOMP_COMPAT
2810  if ( __kmp_atomic_mode == 2 ) {
2811  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2812  }
2813  else
2814 #endif /* KMP_GOMP_COMPAT */
2815  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2816 
2817  (*f)( lhs, lhs, rhs );
2818 
2819 #ifdef KMP_GOMP_COMPAT
2820  if ( __kmp_atomic_mode == 2 ) {
2821  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2822  }
2823  else
2824 #endif /* KMP_GOMP_COMPAT */
2825  __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid );
2826 }
2827 
2828 void
2829 __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2830 {
2831  KMP_DEBUG_ASSERT( __kmp_init_serial );
2832 
2833 #ifdef KMP_GOMP_COMPAT
2834  if ( __kmp_atomic_mode == 2 ) {
2835  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2836  }
2837  else
2838 #endif /* KMP_GOMP_COMPAT */
2839  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2840 
2841  (*f)( lhs, lhs, rhs );
2842 
2843 #ifdef KMP_GOMP_COMPAT
2844  if ( __kmp_atomic_mode == 2 ) {
2845  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2846  }
2847  else
2848 #endif /* KMP_GOMP_COMPAT */
2849  __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid );
2850 }
2851 
2852 void
2853 __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2854 {
2855  KMP_DEBUG_ASSERT( __kmp_init_serial );
2856 
2857 #ifdef KMP_GOMP_COMPAT
2858  if ( __kmp_atomic_mode == 2 ) {
2859  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2860  }
2861  else
2862 #endif /* KMP_GOMP_COMPAT */
2863  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2864 
2865  (*f)( lhs, lhs, rhs );
2866 
2867 #ifdef KMP_GOMP_COMPAT
2868  if ( __kmp_atomic_mode == 2 ) {
2869  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2870  }
2871  else
2872 #endif /* KMP_GOMP_COMPAT */
2873  __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid );
2874 }
2875 
2876 void
2877 __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2878 {
2879  KMP_DEBUG_ASSERT( __kmp_init_serial );
2880 
2881 #ifdef KMP_GOMP_COMPAT
2882  if ( __kmp_atomic_mode == 2 ) {
2883  __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2884  }
2885  else
2886 #endif /* KMP_GOMP_COMPAT */
2887  __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2888 
2889  (*f)( lhs, lhs, rhs );
2890 
2891 #ifdef KMP_GOMP_COMPAT
2892  if ( __kmp_atomic_mode == 2 ) {
2893  __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2894  }
2895  else
2896 #endif /* KMP_GOMP_COMPAT */
2897  __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid );
2898 }
2899 
2900 // AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler
2901 // duplicated in order to not use 3-party names in pure Intel code
2902 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
2903 void
2904 __kmpc_atomic_start(void)
2905 {
2906  int gtid = __kmp_entry_gtid();
2907  KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
2908  __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
2909 }
2910 
2911 
2912 void
2913 __kmpc_atomic_end(void)
2914 {
2915  int gtid = __kmp_get_gtid();
2916  KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
2917  __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
2918 }
2919 
2920 /* ------------------------------------------------------------------------ */
2921 /* ------------------------------------------------------------------------ */
2926 // end of file
Definition: kmp.h:218