/* * kmp_lock.h -- lock header file */ //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure // // This file is dual licensed under the MIT and the University of Illinois Open // Source Licenses. See LICENSE.txt for details. // //===----------------------------------------------------------------------===// #ifndef KMP_LOCK_H #define KMP_LOCK_H #include // CHAR_BIT #include // offsetof #include "kmp_os.h" #include "kmp_debug.h" #ifdef __cplusplus #include extern "C" { #endif // __cplusplus // ---------------------------------------------------------------------------- // Have to copy these definitions from kmp.h because kmp.h cannot be included // due to circular dependencies. Will undef these at end of file. #define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) #define KMP_GTID_DNE (-2) // Forward declaration of ident and ident_t struct ident; typedef struct ident ident_t; // End of copied code. // ---------------------------------------------------------------------------- // // We need to know the size of the area we can assume that the compiler(s) // allocated for obects of type omp_lock_t and omp_nest_lock_t. The Intel // compiler always allocates a pointer-sized area, as does visual studio. // // gcc however, only allocates 4 bytes for regular locks, even on 64-bit // intel archs. It allocates at least 8 bytes for nested lock (more on // recent versions), but we are bounded by the pointer-sized chunks that // the Intel compiler allocates. // #if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT) # define OMP_LOCK_T_SIZE sizeof(int) # define OMP_NEST_LOCK_T_SIZE sizeof(void *) #else # define OMP_LOCK_T_SIZE sizeof(void *) # define OMP_NEST_LOCK_T_SIZE sizeof(void *) #endif // // The Intel compiler allocates a 32-byte chunk for a critical section. // Both gcc and visual studio only allocate enough space for a pointer. // Sometimes we know that the space was allocated by the Intel compiler. // #define OMP_CRITICAL_SIZE sizeof(void *) #define INTEL_CRITICAL_SIZE 32 // // lock flags // typedef kmp_uint32 kmp_lock_flags_t; #define kmp_lf_critical_section 1 // // When a lock table is used, the indices are of kmp_lock_index_t // typedef kmp_uint32 kmp_lock_index_t; // // When memory allocated for locks are on the lock pool (free list), // it is treated as structs of this type. // struct kmp_lock_pool { union kmp_user_lock *next; kmp_lock_index_t index; }; typedef struct kmp_lock_pool kmp_lock_pool_t; extern void __kmp_validate_locks( void ); // ---------------------------------------------------------------------------- // // There are 5 lock implementations: // // 1. Test and set locks. // 2. futex locks (Linux* OS on x86 and Intel(R) Many Integrated Core architecture) // 3. Ticket (Lamport bakery) locks. // 4. Queuing locks (with separate spin fields). // 5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks // // and 3 lock purposes: // // 1. Bootstrap locks -- Used for a few locks available at library startup-shutdown time. // These do not require non-negative global thread ID's. // 2. Internal RTL locks -- Used everywhere else in the RTL // 3. User locks (includes critical sections) // // ---------------------------------------------------------------------------- // ============================================================================ // Lock implementations. // ============================================================================ // ---------------------------------------------------------------------------- // Test and set locks. // // Non-nested test and set locks differ from the other lock kinds (except // futex) in that we use the memory allocated by the compiler for the lock, // rather than a pointer to it. // // On lin32, lin_32e, and win_32, the space allocated may be as small as 4 // bytes, so we have to use a lock table for nested locks, and avoid accessing // the depth_locked field for non-nested locks. // // Information normally available to the tools, such as lock location, // lock usage (normal lock vs. critical section), etc. is not available with // test and set locks. // ---------------------------------------------------------------------------- struct kmp_base_tas_lock { volatile kmp_int32 poll; // 0 => unlocked // locked: (gtid+1) of owning thread kmp_int32 depth_locked; // depth locked, for nested locks only }; typedef struct kmp_base_tas_lock kmp_base_tas_lock_t; union kmp_tas_lock { kmp_base_tas_lock_t lk; kmp_lock_pool_t pool; // make certain struct is large enough double lk_align; // use worst case alignment // no cache line padding }; typedef union kmp_tas_lock kmp_tas_lock_t; // // Static initializer for test and set lock variables. Usage: // kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock ); // #define KMP_TAS_LOCK_INITIALIZER( lock ) { { 0, 0 } } extern int __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); extern int __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); extern int __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); extern void __kmp_init_tas_lock( kmp_tas_lock_t *lck ); extern void __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ); extern int __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); extern int __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); extern int __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); extern void __kmp_init_nested_tas_lock( kmp_tas_lock_t *lck ); extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ); #define KMP_LOCK_RELEASED 1 #define KMP_LOCK_STILL_HELD 0 #define KMP_LOCK_ACQUIRED_FIRST 1 #define KMP_LOCK_ACQUIRED_NEXT 0 #define KMP_USE_FUTEX (KMP_OS_LINUX && !KMP_OS_CNK && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)) #if KMP_USE_FUTEX // ---------------------------------------------------------------------------- // futex locks. futex locks are only available on Linux* OS. // // Like non-nested test and set lock, non-nested futex locks use the memory // allocated by the compiler for the lock, rather than a pointer to it. // // Information normally available to the tools, such as lock location, // lock usage (normal lock vs. critical section), etc. is not available with // test and set locks. With non-nested futex locks, the lock owner is not // even available. // ---------------------------------------------------------------------------- struct kmp_base_futex_lock { volatile kmp_int32 poll; // 0 => unlocked // 2*(gtid+1) of owning thread, 0 if unlocked // locked: (gtid+1) of owning thread kmp_int32 depth_locked; // depth locked, for nested locks only }; typedef struct kmp_base_futex_lock kmp_base_futex_lock_t; union kmp_futex_lock { kmp_base_futex_lock_t lk; kmp_lock_pool_t pool; // make certain struct is large enough double lk_align; // use worst case alignment // no cache line padding }; typedef union kmp_futex_lock kmp_futex_lock_t; // // Static initializer for futex lock variables. Usage: // kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock ); // #define KMP_FUTEX_LOCK_INITIALIZER( lock ) { { 0, 0 } } extern int __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); extern int __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); extern int __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); extern void __kmp_init_futex_lock( kmp_futex_lock_t *lck ); extern void __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ); extern int __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); extern int __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); extern int __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck ); extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ); #endif // KMP_USE_FUTEX // ---------------------------------------------------------------------------- // Ticket locks. // ---------------------------------------------------------------------------- #ifdef __cplusplus #ifdef _MSC_VER // MSVC won't allow use of std::atomic<> in a union since it has non-trivial copy constructor. struct kmp_base_ticket_lock { // `initialized' must be the first entry in the lock data structure! std::atomic_bool initialized; volatile union kmp_ticket_lock *self; // points to the lock union ident_t const * location; // Source code location of omp_init_lock(). std::atomic_uint next_ticket; // ticket number to give to next thread which acquires std::atomic_uint now_serving; // ticket number for thread which holds the lock std::atomic_int owner_id; // (gtid+1) of owning thread, 0 if unlocked std::atomic_int depth_locked; // depth locked, for nested locks only kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock }; #else struct kmp_base_ticket_lock { // `initialized' must be the first entry in the lock data structure! std::atomic initialized; volatile union kmp_ticket_lock *self; // points to the lock union ident_t const * location; // Source code location of omp_init_lock(). std::atomic next_ticket; // ticket number to give to next thread which acquires std::atomic now_serving; // ticket number for thread which holds the lock std::atomic owner_id; // (gtid+1) of owning thread, 0 if unlocked std::atomic depth_locked; // depth locked, for nested locks only kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock }; #endif #else // __cplusplus struct kmp_base_ticket_lock; #endif // !__cplusplus typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t; union KMP_ALIGN_CACHE kmp_ticket_lock { kmp_base_ticket_lock_t lk; // This field must be first to allow static initializing. kmp_lock_pool_t pool; double lk_align; // use worst case alignment char lk_pad[ KMP_PAD( kmp_base_ticket_lock_t, CACHE_LINE ) ]; }; typedef union kmp_ticket_lock kmp_ticket_lock_t; // // Static initializer for simple ticket lock variables. Usage: // kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock ); // Note the macro argument. It is important to make var properly initialized. // #define KMP_TICKET_LOCK_INITIALIZER( lock ) { { ATOMIC_VAR_INIT(true), \ &(lock), \ NULL, \ ATOMIC_VAR_INIT(0U), \ ATOMIC_VAR_INIT(0U), \ ATOMIC_VAR_INIT(0), \ ATOMIC_VAR_INIT(-1) } } extern int __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); extern int __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); extern int __kmp_test_ticket_lock_with_cheks( kmp_ticket_lock_t *lck, kmp_int32 gtid ); extern int __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); extern void __kmp_init_ticket_lock( kmp_ticket_lock_t *lck ); extern void __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ); extern int __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); extern int __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); extern int __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); extern void __kmp_init_nested_ticket_lock( kmp_ticket_lock_t *lck ); extern void __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ); // ---------------------------------------------------------------------------- // Queuing locks. // ---------------------------------------------------------------------------- #if KMP_USE_ADAPTIVE_LOCKS struct kmp_adaptive_lock_info; typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t; #if KMP_DEBUG_ADAPTIVE_LOCKS struct kmp_adaptive_lock_statistics { /* So we can get stats from locks that haven't been destroyed. */ kmp_adaptive_lock_info_t * next; kmp_adaptive_lock_info_t * prev; /* Other statistics */ kmp_uint32 successfulSpeculations; kmp_uint32 hardFailedSpeculations; kmp_uint32 softFailedSpeculations; kmp_uint32 nonSpeculativeAcquires; kmp_uint32 nonSpeculativeAcquireAttempts; kmp_uint32 lemmingYields; }; typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t; extern void __kmp_print_speculative_stats(); extern void __kmp_init_speculative_stats(); #endif // KMP_DEBUG_ADAPTIVE_LOCKS struct kmp_adaptive_lock_info { /* Values used for adaptivity. * Although these are accessed from multiple threads we don't access them atomically, * because if we miss updates it probably doesn't matter much. (It just affects our * decision about whether to try speculation on the lock). */ kmp_uint32 volatile badness; kmp_uint32 volatile acquire_attempts; /* Parameters of the lock. */ kmp_uint32 max_badness; kmp_uint32 max_soft_retries; #if KMP_DEBUG_ADAPTIVE_LOCKS kmp_adaptive_lock_statistics_t volatile stats; #endif }; #endif // KMP_USE_ADAPTIVE_LOCKS struct kmp_base_queuing_lock { // `initialized' must be the first entry in the lock data structure! volatile union kmp_queuing_lock *initialized; // Points to the lock union if in initialized state. ident_t const * location; // Source code location of omp_init_lock(). KMP_ALIGN( 8 ) // tail_id must be 8-byte aligned! volatile kmp_int32 tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty // Must be no padding here since head/tail used in 8-byte CAS volatile kmp_int32 head_id; // (gtid+1) of thread at head of wait queue, 0 if empty // Decl order assumes little endian // bakery-style lock volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked kmp_int32 depth_locked; // depth locked, for nested locks only kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock }; typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t; KMP_BUILD_ASSERT( offsetof( kmp_base_queuing_lock_t, tail_id ) % 8 == 0 ); union KMP_ALIGN_CACHE kmp_queuing_lock { kmp_base_queuing_lock_t lk; // This field must be first to allow static initializing. kmp_lock_pool_t pool; double lk_align; // use worst case alignment char lk_pad[ KMP_PAD( kmp_base_queuing_lock_t, CACHE_LINE ) ]; }; typedef union kmp_queuing_lock kmp_queuing_lock_t; extern int __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); extern int __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); extern int __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); extern void __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ); extern void __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ); extern int __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); extern int __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); extern int __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); extern void __kmp_init_nested_queuing_lock( kmp_queuing_lock_t *lck ); extern void __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ); #if KMP_USE_ADAPTIVE_LOCKS // ---------------------------------------------------------------------------- // Adaptive locks. // ---------------------------------------------------------------------------- struct kmp_base_adaptive_lock { kmp_base_queuing_lock qlk; KMP_ALIGN(CACHE_LINE) kmp_adaptive_lock_info_t adaptive; // Information for the speculative adaptive lock }; typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t; union KMP_ALIGN_CACHE kmp_adaptive_lock { kmp_base_adaptive_lock_t lk; kmp_lock_pool_t pool; double lk_align; char lk_pad[ KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE) ]; }; typedef union kmp_adaptive_lock kmp_adaptive_lock_t; # define GET_QLK_PTR(l) ((kmp_queuing_lock_t *) & (l)->lk.qlk) #endif // KMP_USE_ADAPTIVE_LOCKS // ---------------------------------------------------------------------------- // DRDPA ticket locks. // ---------------------------------------------------------------------------- struct kmp_base_drdpa_lock { // // All of the fields on the first cache line are only written when // initializing or reconfiguring the lock. These are relatively rare // operations, so data from the first cache line will usually stay // resident in the cache of each thread trying to acquire the lock. // // initialized must be the first entry in the lock data structure! // KMP_ALIGN_CACHE volatile union kmp_drdpa_lock * initialized; // points to the lock union if in initialized state ident_t const * location; // Source code location of omp_init_lock(). volatile struct kmp_lock_poll { kmp_uint64 poll; } * volatile polls; volatile kmp_uint64 mask; // is 2**num_polls-1 for mod op kmp_uint64 cleanup_ticket; // thread with cleanup ticket volatile struct kmp_lock_poll * old_polls; // will deallocate old_polls kmp_uint32 num_polls; // must be power of 2 // // next_ticket it needs to exist in a separate cache line, as it is // invalidated every time a thread takes a new ticket. // KMP_ALIGN_CACHE volatile kmp_uint64 next_ticket; // // now_serving is used to store our ticket value while we hold the lock. // It has a slightly different meaning in the DRDPA ticket locks (where // it is written by the acquiring thread) than it does in the simple // ticket locks (where it is written by the releasing thread). // // Since now_serving is only read an written in the critical section, // it is non-volatile, but it needs to exist on a separate cache line, // as it is invalidated at every lock acquire. // // Likewise, the vars used for nested locks (owner_id and depth_locked) // are only written by the thread owning the lock, so they are put in // this cache line. owner_id is read by other threads, so it must be // declared volatile. // KMP_ALIGN_CACHE kmp_uint64 now_serving; // doesn't have to be volatile volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked kmp_int32 depth_locked; // depth locked kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock }; typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t; union KMP_ALIGN_CACHE kmp_drdpa_lock { kmp_base_drdpa_lock_t lk; // This field must be first to allow static initializing. */ kmp_lock_pool_t pool; double lk_align; // use worst case alignment char lk_pad[ KMP_PAD( kmp_base_drdpa_lock_t, CACHE_LINE ) ]; }; typedef union kmp_drdpa_lock kmp_drdpa_lock_t; extern int __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); extern int __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); extern int __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); extern void __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ); extern void __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ); extern int __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); extern int __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); extern int __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); extern void __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); extern void __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); // ============================================================================ // Lock purposes. // ============================================================================ // ---------------------------------------------------------------------------- // Bootstrap locks. // ---------------------------------------------------------------------------- // Bootstrap locks -- very few locks used at library initialization time. // Bootstrap locks are currently implemented as ticket locks. // They could also be implemented as test and set lock, but cannot be // implemented with other lock kinds as they require gtids which are not // available at initialization time. typedef kmp_ticket_lock_t kmp_bootstrap_lock_t; #define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) ) static inline int __kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck ) { return __kmp_acquire_ticket_lock( lck, KMP_GTID_DNE ); } static inline int __kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck ) { return __kmp_test_ticket_lock( lck, KMP_GTID_DNE ); } static inline void __kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck ) { __kmp_release_ticket_lock( lck, KMP_GTID_DNE ); } static inline void __kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck ) { __kmp_init_ticket_lock( lck ); } static inline void __kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck ) { __kmp_destroy_ticket_lock( lck ); } // ---------------------------------------------------------------------------- // Internal RTL locks. // ---------------------------------------------------------------------------- // // Internal RTL locks are also implemented as ticket locks, for now. // // FIXME - We should go through and figure out which lock kind works best for // each internal lock, and use the type declaration and function calls for // that explicit lock kind (and get rid of this section). // typedef kmp_ticket_lock_t kmp_lock_t; static inline int __kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid ) { return __kmp_acquire_ticket_lock( lck, gtid ); } static inline int __kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid ) { return __kmp_test_ticket_lock( lck, gtid ); } static inline void __kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid ) { __kmp_release_ticket_lock( lck, gtid ); } static inline void __kmp_init_lock( kmp_lock_t *lck ) { __kmp_init_ticket_lock( lck ); } static inline void __kmp_destroy_lock( kmp_lock_t *lck ) { __kmp_destroy_ticket_lock( lck ); } // ---------------------------------------------------------------------------- // User locks. // ---------------------------------------------------------------------------- // // Do not allocate objects of type union kmp_user_lock!!! // This will waste space unless __kmp_user_lock_kind == lk_drdpa. // Instead, check the value of __kmp_user_lock_kind and allocate objects of // the type of the appropriate union member, and cast their addresses to // kmp_user_lock_p. // enum kmp_lock_kind { lk_default = 0, lk_tas, #if KMP_USE_FUTEX lk_futex, #endif #if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX lk_hle, lk_rtm, #endif lk_ticket, lk_queuing, lk_drdpa, #if KMP_USE_ADAPTIVE_LOCKS lk_adaptive #endif // KMP_USE_ADAPTIVE_LOCKS }; typedef enum kmp_lock_kind kmp_lock_kind_t; extern kmp_lock_kind_t __kmp_user_lock_kind; union kmp_user_lock { kmp_tas_lock_t tas; #if KMP_USE_FUTEX kmp_futex_lock_t futex; #endif kmp_ticket_lock_t ticket; kmp_queuing_lock_t queuing; kmp_drdpa_lock_t drdpa; #if KMP_USE_ADAPTIVE_LOCKS kmp_adaptive_lock_t adaptive; #endif // KMP_USE_ADAPTIVE_LOCKS kmp_lock_pool_t pool; }; typedef union kmp_user_lock *kmp_user_lock_p; #if ! KMP_USE_DYNAMIC_LOCK extern size_t __kmp_base_user_lock_size; extern size_t __kmp_user_lock_size; extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ); static inline kmp_int32 __kmp_get_user_lock_owner( kmp_user_lock_p lck ) { KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL ); return ( *__kmp_get_user_lock_owner_ )( lck ); } extern int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) #define __kmp_acquire_user_lock_with_checks(lck,gtid) \ if (__kmp_user_lock_kind == lk_tas) { \ if ( __kmp_env_consistency_check ) { \ char const * const func = "omp_set_lock"; \ if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) \ && lck->tas.lk.depth_locked != -1 ) { \ KMP_FATAL( LockNestableUsedAsSimple, func ); \ } \ if ( ( gtid >= 0 ) && ( lck->tas.lk.poll - 1 == gtid ) ) { \ KMP_FATAL( LockIsAlreadyOwned, func ); \ } \ } \ if ( ( lck->tas.lk.poll != 0 ) || \ ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ kmp_uint32 spins; \ KMP_FSYNC_PREPARE( lck ); \ KMP_INIT_YIELD( spins ); \ if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ KMP_YIELD( TRUE ); \ } else { \ KMP_YIELD_SPIN( spins ); \ } \ while ( ( lck->tas.lk.poll != 0 ) || \ ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ KMP_YIELD( TRUE ); \ } else { \ KMP_YIELD_SPIN( spins ); \ } \ } \ } \ KMP_FSYNC_ACQUIRED( lck ); \ } else { \ KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); \ ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); \ } #else static inline int __kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); return ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); } #endif extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) #include "kmp_i18n.h" /* AC: KMP_FATAL definition */ extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */ static inline int __kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { if ( __kmp_user_lock_kind == lk_tas ) { if ( __kmp_env_consistency_check ) { char const * const func = "omp_test_lock"; if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) && lck->tas.lk.depth_locked != -1 ) { KMP_FATAL( LockNestableUsedAsSimple, func ); } } return ( ( lck->tas.lk.poll == 0 ) && KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); } else { KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); } } #else static inline int __kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); } #endif extern int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); static inline void __kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL ); ( *__kmp_release_user_lock_with_checks_ ) ( lck, gtid ); } extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ); static inline void __kmp_init_user_lock_with_checks( kmp_user_lock_p lck ) { KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL ); ( *__kmp_init_user_lock_with_checks_ )( lck ); } // // We need a non-checking version of destroy lock for when the RTL is // doing the cleanup as it can't always tell if the lock is nested or not. // extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ); static inline void __kmp_destroy_user_lock( kmp_user_lock_p lck ) { KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL ); ( *__kmp_destroy_user_lock_ )( lck ); } extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ); static inline void __kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck ) { KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL ); ( *__kmp_destroy_user_lock_with_checks_ )( lck ); } extern int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) #define __kmp_acquire_nested_user_lock_with_checks(lck,gtid,depth) \ if (__kmp_user_lock_kind == lk_tas) { \ if ( __kmp_env_consistency_check ) { \ char const * const func = "omp_set_nest_lock"; \ if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) \ && lck->tas.lk.depth_locked == -1 ) { \ KMP_FATAL( LockSimpleUsedAsNestable, func ); \ } \ } \ if ( lck->tas.lk.poll - 1 == gtid ) { \ lck->tas.lk.depth_locked += 1; \ *depth = KMP_LOCK_ACQUIRED_NEXT; \ } else { \ if ( ( lck->tas.lk.poll != 0 ) || \ ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ kmp_uint32 spins; \ KMP_FSYNC_PREPARE( lck ); \ KMP_INIT_YIELD( spins ); \ if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ KMP_YIELD( TRUE ); \ } else { \ KMP_YIELD_SPIN( spins ); \ } \ while ( ( lck->tas.lk.poll != 0 ) || \ ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ KMP_YIELD( TRUE ); \ } else { \ KMP_YIELD_SPIN( spins ); \ } \ } \ } \ lck->tas.lk.depth_locked = 1; \ *depth = KMP_LOCK_ACQUIRED_FIRST; \ } \ KMP_FSYNC_ACQUIRED( lck ); \ } else { \ KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); \ *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); \ } #else static inline void __kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid, int* depth ) { KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); } #endif extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) static inline int __kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { if ( __kmp_user_lock_kind == lk_tas ) { int retval; if ( __kmp_env_consistency_check ) { char const * const func = "omp_test_nest_lock"; if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) && lck->tas.lk.depth_locked == -1 ) { KMP_FATAL( LockSimpleUsedAsNestable, func ); } } KMP_DEBUG_ASSERT( gtid >= 0 ); if ( lck->tas.lk.poll - 1 == gtid ) { /* __kmp_get_tas_lock_owner( lck ) == gtid */ return ++lck->tas.lk.depth_locked; /* same owner, depth increased */ } retval = ( ( lck->tas.lk.poll == 0 ) && KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); if ( retval ) { KMP_MB(); lck->tas.lk.depth_locked = 1; } return retval; } else { KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); } } #else static inline int __kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); } #endif extern int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); static inline int __kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) { KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL ); return ( *__kmp_release_nested_user_lock_with_checks_ )( lck, gtid ); } extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck ) { KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL ); ( *__kmp_init_nested_user_lock_with_checks_ )( lck ); } extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); static inline void __kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck ) { KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL ); ( *__kmp_destroy_nested_user_lock_with_checks_ )( lck ); } // // user lock functions which do not necessarily exist for all lock kinds. // // The "set" functions usually have wrapper routines that check for a NULL set // function pointer and call it if non-NULL. // // In some cases, it makes sense to have a "get" wrapper function check for a // NULL get function pointer and return NULL / invalid value / error code if // the function pointer is NULL. // // In other cases, the calling code really should differentiate between an // unimplemented function and one that is implemented but returning NULL / // invalied value. If this is the case, no get function wrapper exists. // extern int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ); // no set function; fields set durining local allocation extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ); static inline const ident_t * __kmp_get_user_lock_location( kmp_user_lock_p lck ) { if ( __kmp_get_user_lock_location_ != NULL ) { return ( *__kmp_get_user_lock_location_ )( lck ); } else { return NULL; } } extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ); static inline void __kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc ) { if ( __kmp_set_user_lock_location_ != NULL ) { ( *__kmp_set_user_lock_location_ )( lck, loc ); } } extern kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ); extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ); static inline void __kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags ) { if ( __kmp_set_user_lock_flags_ != NULL ) { ( *__kmp_set_user_lock_flags_ )( lck, flags ); } } // // The fuction which sets up all of the vtbl pointers for kmp_user_lock_t. // extern void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ); // // Macros for binding user lock functions. // #define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) { \ __kmp_acquire##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ __kmp_acquire##nest##kind##_##suffix; \ __kmp_release##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ __kmp_release##nest##kind##_##suffix; \ __kmp_test##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ __kmp_test##nest##kind##_##suffix; \ __kmp_init##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \ __kmp_init##nest##kind##_##suffix; \ __kmp_destroy##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \ __kmp_destroy##nest##kind##_##suffix; \ } #define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock) #define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks) #define KMP_BIND_NESTED_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock) #define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks) // ---------------------------------------------------------------------------- // User lock table & lock allocation // ---------------------------------------------------------------------------- /* On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory for lock variable, which is not enough to store a pointer, so we have to use lock indexes instead of pointers and maintain lock table to map indexes to pointers. Note: The first element of the table is not a pointer to lock! It is a pointer to previously allocated table (or NULL if it is the first table). Usage: if ( OMP_LOCK_T_SIZE < sizeof( ) ) { // or OMP_NEST_LOCK_T_SIZE Lock table is fully utilized. User locks are indexes, so table is used on user lock operation. Note: it may be the case (lin_32) that we don't need to use a lock table for regular locks, but do need the table for nested locks. } else { Lock table initialized but not actually used. } */ struct kmp_lock_table { kmp_lock_index_t used; // Number of used elements kmp_lock_index_t allocated; // Number of allocated elements kmp_user_lock_p * table; // Lock table. }; typedef struct kmp_lock_table kmp_lock_table_t; extern kmp_lock_table_t __kmp_user_lock_table; extern kmp_user_lock_p __kmp_lock_pool; struct kmp_block_of_locks { struct kmp_block_of_locks * next_block; void * locks; }; typedef struct kmp_block_of_locks kmp_block_of_locks_t; extern kmp_block_of_locks_t *__kmp_lock_blocks; extern int __kmp_num_locks_in_block; extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags ); extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ); extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func ); extern void __kmp_cleanup_user_locks(); #define KMP_CHECK_USER_LOCK_INIT() \ { \ if ( ! TCR_4( __kmp_init_user_locks ) ) { \ __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); \ if ( ! TCR_4( __kmp_init_user_locks ) ) { \ TCW_4( __kmp_init_user_locks, TRUE ); \ } \ __kmp_release_bootstrap_lock( &__kmp_initz_lock ); \ } \ } #endif // KMP_USE_DYNAMIC_LOCK #undef KMP_PAD #undef KMP_GTID_DNE #if KMP_USE_DYNAMIC_LOCK // // KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without breaking the current // compatibility. Essential functionality of this new code is dynamic dispatch, but it also // implements (or enables implementation of) hinted user lock and critical section which will be // part of OMP 4.5 soon. // // Lock type can be decided at creation time (i.e., lock initialization), and subsequent lock // function call on the created lock object requires type extraction and call through jump table // using the extracted type. This type information is stored in two different ways depending on // the size of the lock object, and we differentiate lock types by this size requirement - direct // and indirect locks. // // Direct locks: // A direct lock object fits into the space created by the compiler for an omp_lock_t object, and // TAS/Futex lock falls into this category. We use low one byte of the lock object as the storage // for the lock type, and appropriate bit operation is required to access the data meaningful to // the lock algorithms. Also, to differentiate direct lock from indirect lock, 1 is written to LSB // of the lock object. The newly introduced "hle" lock is also a direct lock. // // Indirect locks: // An indirect lock object requires more space than the compiler-generated space, and it should be // allocated from heap. Depending on the size of the compiler-generated space for the lock (i.e., // size of omp_lock_t), this omp_lock_t object stores either the address of the heap-allocated // indirect lock (void * fits in the object) or an index to the indirect lock table entry that // holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this category, and the newly // introduced "rtm" lock is also an indirect lock which was implemented on top of the Queuing lock. // When the omp_lock_t object holds an index (not lock address), 0 is written to LSB to // differentiate the lock from a direct lock, and the remaining part is the actual index to the // indirect lock table. // #include // for uintptr_t // Shortcuts #define KMP_USE_INLINED_TAS (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1 #define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0 // List of lock definitions; all nested locks are indirect locks. // hle lock is xchg lock prefixed with XACQUIRE/XRELEASE. // All nested locks are indirect lock types. #if KMP_USE_TSX # if KMP_USE_FUTEX # define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ m(nested_queuing, a) m(nested_drdpa, a) # else # define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ m(nested_tas, a) m(nested_ticket, a) \ m(nested_queuing, a) m(nested_drdpa, a) # endif // KMP_USE_FUTEX # define KMP_LAST_D_LOCK lockseq_hle #else # if KMP_USE_FUTEX # define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ m(nested_queuing, a) m(nested_drdpa, a) # define KMP_LAST_D_LOCK lockseq_futex # else # define KMP_FOREACH_D_LOCK(m, a) m(tas, a) # define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ m(nested_tas, a) m(nested_ticket, a) \ m(nested_queuing, a) m(nested_drdpa, a) # define KMP_LAST_D_LOCK lockseq_tas # endif // KMP_USE_FUTEX #endif // KMP_USE_TSX // Information used in dynamic dispatch #define KMP_LOCK_SHIFT 8 // number of low bits to be used as tag for direct locks #define KMP_FIRST_D_LOCK lockseq_tas #define KMP_FIRST_I_LOCK lockseq_ticket #define KMP_LAST_I_LOCK lockseq_nested_drdpa #define KMP_NUM_I_LOCKS (locktag_nested_drdpa+1) // number of indirect lock types // Base type for dynamic locks. typedef kmp_uint32 kmp_dyna_lock_t; // Lock sequence that enumerates all lock kinds. // Always make this enumeration consistent with kmp_lockseq_t in the include directory. typedef enum { lockseq_indirect = 0, #define expand_seq(l,a) lockseq_##l, KMP_FOREACH_D_LOCK(expand_seq, 0) KMP_FOREACH_I_LOCK(expand_seq, 0) #undef expand_seq } kmp_dyna_lockseq_t; // Enumerates indirect lock tags. typedef enum { #define expand_tag(l,a) locktag_##l, KMP_FOREACH_I_LOCK(expand_tag, 0) #undef expand_tag } kmp_indirect_locktag_t; // Utility macros that extract information from lock sequences. #define KMP_IS_D_LOCK(seq) ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK) #define KMP_IS_I_LOCK(seq) ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK) #define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq) - KMP_FIRST_I_LOCK) #define KMP_GET_D_TAG(seq) ((seq)<<1 | 1) // Enumerates direct lock tags starting from indirect tag. typedef enum { #define expand_tag(l,a) locktag_##l = KMP_GET_D_TAG(lockseq_##l), KMP_FOREACH_D_LOCK(expand_tag, 0) #undef expand_tag } kmp_direct_locktag_t; // Indirect lock type typedef struct { kmp_user_lock_p lock; kmp_indirect_locktag_t type; } kmp_indirect_lock_t; // Function tables for direct locks. Set/unset/test differentiate functions with/without consistency checking. extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *); extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); // Function tables for indirect locks. Set/unset/test differentiate functions with/withuot consistency checking. extern void (*__kmp_indirect_init[])(kmp_user_lock_p); extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p); extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); // Extracts direct lock tag from a user lock pointer #define KMP_EXTRACT_D_TAG(l) (*((kmp_dyna_lock_t *)(l)) & ((1<> 1) // Returns function pointer to the direct lock function with l (kmp_dyna_lock_t *) and op (operation type). #define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)] // Returns function pointer to the indirect lock function with l (kmp_indirect_lock_t *) and op (operation type). #define KMP_I_LOCK_FUNC(l, op) __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type] // Initializes a direct lock with the given lock pointer and lock sequence. #define KMP_INIT_D_LOCK(l, seq) __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq) // Initializes an indirect lock with the given lock pointer and lock sequence. #define KMP_INIT_I_LOCK(l, seq) __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq) // Returns "free" lock value for the given lock type. #define KMP_LOCK_FREE(type) (locktag_##type) // Returns "busy" lock value for the given lock teyp. #define KMP_LOCK_BUSY(v, type) ((v)<>KMP_LOCK_SHIFT) // Initializes global states and data structures for managing dynamic user locks. extern void __kmp_init_dynamic_user_locks(); // Allocates and returns an indirect lock with the given indirect lock tag. extern kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t); // Cleans up global states and data structures for managing dynamic user locks. extern void __kmp_cleanup_indirect_user_locks(); // Default user lock sequence when not using hinted locks. extern kmp_dyna_lockseq_t __kmp_user_lock_seq; // Jump table for "set lock location", available only for indirect locks. extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *); #define KMP_SET_I_LOCK_LOCATION(lck, loc) { \ if (__kmp_indirect_set_location[(lck)->type] != NULL) \ __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \ } // Jump table for "set lock flags", available only for indirect locks. extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t); #define KMP_SET_I_LOCK_FLAGS(lck, flag) { \ if (__kmp_indirect_set_flags[(lck)->type] != NULL) \ __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \ } // Jump table for "get lock location", available only for indirect locks. extern const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p); #define KMP_GET_I_LOCK_LOCATION(lck) ( __kmp_indirect_get_location[(lck)->type] != NULL \ ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \ : NULL ) // Jump table for "get lock flags", available only for indirect locks. extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p); #define KMP_GET_I_LOCK_FLAGS(lck) ( __kmp_indirect_get_flags[(lck)->type] != NULL \ ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ : NULL ) #define KMP_I_LOCK_CHUNK 1024 // number of kmp_indirect_lock_t objects to be allocated together // Lock table for indirect locks. typedef struct kmp_indirect_lock_table { kmp_indirect_lock_t **table; // blocks of indirect locks allocated kmp_lock_index_t size; // size of the indirect lock table kmp_lock_index_t next; // index to the next lock to be allocated } kmp_indirect_lock_table_t; extern kmp_indirect_lock_table_t __kmp_i_lock_table; // Returns the indirect lock associated with the given index. #define KMP_GET_I_LOCK(index) (*(__kmp_i_lock_table.table + (index)/KMP_I_LOCK_CHUNK) + (index)%KMP_I_LOCK_CHUNK) // Number of locks in a lock block, which is fixed to "1" now. // TODO: No lock block implementation now. If we do support, we need to manage lock block data // structure for each indirect lock type. extern int __kmp_num_locks_in_block; // Fast lock table lookup without consistency checking #define KMP_LOOKUP_I_LOCK(l) ( (OMP_LOCK_T_SIZE < sizeof(void *)) \ ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \ : *((kmp_indirect_lock_t **)(l)) ) // Used once in kmp_error.c extern kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); #else // KMP_USE_DYNAMIC_LOCK # define KMP_LOCK_BUSY(v, type) (v) # define KMP_LOCK_FREE(type) 0 # define KMP_LOCK_STRIP(v) (v) #endif // KMP_USE_DYNAMIC_LOCK // data structure for using backoff within spin locks. typedef struct { kmp_uint32 step; // current step kmp_uint32 max_backoff; // upper bound of outer delay loop kmp_uint32 min_tick; // size of inner delay loop in ticks (machine-dependent) } kmp_backoff_t; // Runtime's default backoff parameters extern kmp_backoff_t __kmp_spin_backoff_params; // Backoff function extern void __kmp_spin_backoff(kmp_backoff_t *); #ifdef __cplusplus } // extern "C" #endif // __cplusplus #endif /* KMP_LOCK_H */