diff options
Diffstat (limited to 'final/runtime/src/kmp_lock.h')
-rw-r--r-- | final/runtime/src/kmp_lock.h | 1275 |
1 files changed, 1275 insertions, 0 deletions
diff --git a/final/runtime/src/kmp_lock.h b/final/runtime/src/kmp_lock.h new file mode 100644 index 0000000..ccd84eb --- /dev/null +++ b/final/runtime/src/kmp_lock.h @@ -0,0 +1,1275 @@ +/* + * kmp_lock.h -- lock header file + */ + +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef KMP_LOCK_H +#define KMP_LOCK_H + +#include <limits.h> // CHAR_BIT +#include <stddef.h> // offsetof + +#include "kmp_debug.h" +#include "kmp_os.h" + +#ifdef __cplusplus +#include <atomic> + +extern "C" { +#endif // __cplusplus + +// ---------------------------------------------------------------------------- +// Have to copy these definitions from kmp.h because kmp.h cannot be included +// due to circular dependencies. Will undef these at end of file. + +#define KMP_PAD(type, sz) \ + (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) +#define KMP_GTID_DNE (-2) + +// Forward declaration of ident and ident_t + +struct ident; +typedef struct ident ident_t; + +// End of copied code. +// ---------------------------------------------------------------------------- + +// We need to know the size of the area we can assume that the compiler(s) +// allocated for obects of type omp_lock_t and omp_nest_lock_t. The Intel +// compiler always allocates a pointer-sized area, as does visual studio. +// +// gcc however, only allocates 4 bytes for regular locks, even on 64-bit +// intel archs. It allocates at least 8 bytes for nested lock (more on +// recent versions), but we are bounded by the pointer-sized chunks that +// the Intel compiler allocates. + +#if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT) +#define OMP_LOCK_T_SIZE sizeof(int) +#define OMP_NEST_LOCK_T_SIZE sizeof(void *) +#else +#define OMP_LOCK_T_SIZE sizeof(void *) +#define OMP_NEST_LOCK_T_SIZE sizeof(void *) +#endif + +// The Intel compiler allocates a 32-byte chunk for a critical section. +// Both gcc and visual studio only allocate enough space for a pointer. +// Sometimes we know that the space was allocated by the Intel compiler. +#define OMP_CRITICAL_SIZE sizeof(void *) +#define INTEL_CRITICAL_SIZE 32 + +// lock flags +typedef kmp_uint32 kmp_lock_flags_t; + +#define kmp_lf_critical_section 1 + +// When a lock table is used, the indices are of kmp_lock_index_t +typedef kmp_uint32 kmp_lock_index_t; + +// When memory allocated for locks are on the lock pool (free list), +// it is treated as structs of this type. +struct kmp_lock_pool { + union kmp_user_lock *next; + kmp_lock_index_t index; +}; + +typedef struct kmp_lock_pool kmp_lock_pool_t; + +extern void __kmp_validate_locks(void); + +// ---------------------------------------------------------------------------- +// There are 5 lock implementations: +// 1. Test and set locks. +// 2. futex locks (Linux* OS on x86 and +// Intel(R) Many Integrated Core Architecture) +// 3. Ticket (Lamport bakery) locks. +// 4. Queuing locks (with separate spin fields). +// 5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks +// +// and 3 lock purposes: +// 1. Bootstrap locks -- Used for a few locks available at library +// startup-shutdown time. +// These do not require non-negative global thread ID's. +// 2. Internal RTL locks -- Used everywhere else in the RTL +// 3. User locks (includes critical sections) +// ---------------------------------------------------------------------------- + +// ============================================================================ +// Lock implementations. +// +// Test and set locks. +// +// Non-nested test and set locks differ from the other lock kinds (except +// futex) in that we use the memory allocated by the compiler for the lock, +// rather than a pointer to it. +// +// On lin32, lin_32e, and win_32, the space allocated may be as small as 4 +// bytes, so we have to use a lock table for nested locks, and avoid accessing +// the depth_locked field for non-nested locks. +// +// Information normally available to the tools, such as lock location, lock +// usage (normal lock vs. critical section), etc. is not available with test and +// set locks. +// ---------------------------------------------------------------------------- + +struct kmp_base_tas_lock { + // KMP_LOCK_FREE(tas) => unlocked; locked: (gtid+1) of owning thread + std::atomic<kmp_int32> poll; + kmp_int32 depth_locked; // depth locked, for nested locks only +}; + +typedef struct kmp_base_tas_lock kmp_base_tas_lock_t; + +union kmp_tas_lock { + kmp_base_tas_lock_t lk; + kmp_lock_pool_t pool; // make certain struct is large enough + double lk_align; // use worst case alignment; no cache line padding +}; + +typedef union kmp_tas_lock kmp_tas_lock_t; + +// Static initializer for test and set lock variables. Usage: +// kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock ); +#define KMP_TAS_LOCK_INITIALIZER(lock) \ + { \ + { ATOMIC_VAR_INIT(KMP_LOCK_FREE(tas)), 0 } \ + } + +extern int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); +extern int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); +extern int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); +extern void __kmp_init_tas_lock(kmp_tas_lock_t *lck); +extern void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck); + +extern int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); +extern int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); +extern int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); +extern void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck); +extern void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck); + +#define KMP_LOCK_RELEASED 1 +#define KMP_LOCK_STILL_HELD 0 +#define KMP_LOCK_ACQUIRED_FIRST 1 +#define KMP_LOCK_ACQUIRED_NEXT 0 +#ifndef KMP_USE_FUTEX +#define KMP_USE_FUTEX \ + (KMP_OS_LINUX && !KMP_OS_CNK && \ + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)) +#endif +#if KMP_USE_FUTEX + +// ---------------------------------------------------------------------------- +// futex locks. futex locks are only available on Linux* OS. +// +// Like non-nested test and set lock, non-nested futex locks use the memory +// allocated by the compiler for the lock, rather than a pointer to it. +// +// Information normally available to the tools, such as lock location, lock +// usage (normal lock vs. critical section), etc. is not available with test and +// set locks. With non-nested futex locks, the lock owner is not even available. +// ---------------------------------------------------------------------------- + +struct kmp_base_futex_lock { + volatile kmp_int32 poll; // KMP_LOCK_FREE(futex) => unlocked + // 2*(gtid+1) of owning thread, 0 if unlocked + // locked: (gtid+1) of owning thread + kmp_int32 depth_locked; // depth locked, for nested locks only +}; + +typedef struct kmp_base_futex_lock kmp_base_futex_lock_t; + +union kmp_futex_lock { + kmp_base_futex_lock_t lk; + kmp_lock_pool_t pool; // make certain struct is large enough + double lk_align; // use worst case alignment + // no cache line padding +}; + +typedef union kmp_futex_lock kmp_futex_lock_t; + +// Static initializer for futex lock variables. Usage: +// kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock ); +#define KMP_FUTEX_LOCK_INITIALIZER(lock) \ + { \ + { KMP_LOCK_FREE(futex), 0 } \ + } + +extern int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); +extern int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); +extern int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); +extern void __kmp_init_futex_lock(kmp_futex_lock_t *lck); +extern void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck); + +extern int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, + kmp_int32 gtid); +extern int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); +extern int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, + kmp_int32 gtid); +extern void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck); +extern void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck); + +#endif // KMP_USE_FUTEX + +// ---------------------------------------------------------------------------- +// Ticket locks. + +#ifdef __cplusplus + +#ifdef _MSC_VER +// MSVC won't allow use of std::atomic<> in a union since it has non-trivial +// copy constructor. + +struct kmp_base_ticket_lock { + // `initialized' must be the first entry in the lock data structure! + std::atomic_bool initialized; + volatile union kmp_ticket_lock *self; // points to the lock union + ident_t const *location; // Source code location of omp_init_lock(). + std::atomic_uint + next_ticket; // ticket number to give to next thread which acquires + std::atomic_uint now_serving; // ticket number for thread which holds the lock + std::atomic_int owner_id; // (gtid+1) of owning thread, 0 if unlocked + std::atomic_int depth_locked; // depth locked, for nested locks only + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock +}; +#else +struct kmp_base_ticket_lock { + // `initialized' must be the first entry in the lock data structure! + std::atomic<bool> initialized; + volatile union kmp_ticket_lock *self; // points to the lock union + ident_t const *location; // Source code location of omp_init_lock(). + std::atomic<unsigned> + next_ticket; // ticket number to give to next thread which acquires + std::atomic<unsigned> + now_serving; // ticket number for thread which holds the lock + std::atomic<int> owner_id; // (gtid+1) of owning thread, 0 if unlocked + std::atomic<int> depth_locked; // depth locked, for nested locks only + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock +}; +#endif + +#else // __cplusplus + +struct kmp_base_ticket_lock; + +#endif // !__cplusplus + +typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t; + +union KMP_ALIGN_CACHE kmp_ticket_lock { + kmp_base_ticket_lock_t + lk; // This field must be first to allow static initializing. + kmp_lock_pool_t pool; + double lk_align; // use worst case alignment + char lk_pad[KMP_PAD(kmp_base_ticket_lock_t, CACHE_LINE)]; +}; + +typedef union kmp_ticket_lock kmp_ticket_lock_t; + +// Static initializer for simple ticket lock variables. Usage: +// kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock ); +// Note the macro argument. It is important to make var properly initialized. +#define KMP_TICKET_LOCK_INITIALIZER(lock) \ + { \ + { \ + ATOMIC_VAR_INIT(true) \ + , &(lock), NULL, ATOMIC_VAR_INIT(0U), ATOMIC_VAR_INIT(0U), \ + ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(-1) \ + } \ + } + +extern int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid); +extern int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid); +extern int __kmp_test_ticket_lock_with_cheks(kmp_ticket_lock_t *lck, + kmp_int32 gtid); +extern int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid); +extern void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck); +extern void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck); + +extern int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, + kmp_int32 gtid); +extern int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, + kmp_int32 gtid); +extern int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, + kmp_int32 gtid); +extern void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck); +extern void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck); + +// ---------------------------------------------------------------------------- +// Queuing locks. + +#if KMP_USE_ADAPTIVE_LOCKS + +struct kmp_adaptive_lock_info; + +typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t; + +#if KMP_DEBUG_ADAPTIVE_LOCKS + +struct kmp_adaptive_lock_statistics { + /* So we can get stats from locks that haven't been destroyed. */ + kmp_adaptive_lock_info_t *next; + kmp_adaptive_lock_info_t *prev; + + /* Other statistics */ + kmp_uint32 successfulSpeculations; + kmp_uint32 hardFailedSpeculations; + kmp_uint32 softFailedSpeculations; + kmp_uint32 nonSpeculativeAcquires; + kmp_uint32 nonSpeculativeAcquireAttempts; + kmp_uint32 lemmingYields; +}; + +typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t; + +extern void __kmp_print_speculative_stats(); +extern void __kmp_init_speculative_stats(); + +#endif // KMP_DEBUG_ADAPTIVE_LOCKS + +struct kmp_adaptive_lock_info { + /* Values used for adaptivity. + Although these are accessed from multiple threads we don't access them + atomically, because if we miss updates it probably doesn't matter much. (It + just affects our decision about whether to try speculation on the lock). */ + kmp_uint32 volatile badness; + kmp_uint32 volatile acquire_attempts; + /* Parameters of the lock. */ + kmp_uint32 max_badness; + kmp_uint32 max_soft_retries; + +#if KMP_DEBUG_ADAPTIVE_LOCKS + kmp_adaptive_lock_statistics_t volatile stats; +#endif +}; + +#endif // KMP_USE_ADAPTIVE_LOCKS + +struct kmp_base_queuing_lock { + + // `initialized' must be the first entry in the lock data structure! + volatile union kmp_queuing_lock + *initialized; // Points to the lock union if in initialized state. + + ident_t const *location; // Source code location of omp_init_lock(). + + KMP_ALIGN(8) // tail_id must be 8-byte aligned! + + volatile kmp_int32 + tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty + // Must be no padding here since head/tail used in 8-byte CAS + volatile kmp_int32 + head_id; // (gtid+1) of thread at head of wait queue, 0 if empty + // Decl order assumes little endian + // bakery-style lock + volatile kmp_uint32 + next_ticket; // ticket number to give to next thread which acquires + volatile kmp_uint32 + now_serving; // ticket number for thread which holds the lock + volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked + kmp_int32 depth_locked; // depth locked, for nested locks only + + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock +}; + +typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t; + +KMP_BUILD_ASSERT(offsetof(kmp_base_queuing_lock_t, tail_id) % 8 == 0); + +union KMP_ALIGN_CACHE kmp_queuing_lock { + kmp_base_queuing_lock_t + lk; // This field must be first to allow static initializing. + kmp_lock_pool_t pool; + double lk_align; // use worst case alignment + char lk_pad[KMP_PAD(kmp_base_queuing_lock_t, CACHE_LINE)]; +}; + +typedef union kmp_queuing_lock kmp_queuing_lock_t; + +extern int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid); +extern int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid); +extern int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid); +extern void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck); +extern void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck); + +extern int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, + kmp_int32 gtid); +extern int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, + kmp_int32 gtid); +extern int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, + kmp_int32 gtid); +extern void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck); +extern void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck); + +#if KMP_USE_ADAPTIVE_LOCKS + +// ---------------------------------------------------------------------------- +// Adaptive locks. +struct kmp_base_adaptive_lock { + kmp_base_queuing_lock qlk; + KMP_ALIGN(CACHE_LINE) + kmp_adaptive_lock_info_t + adaptive; // Information for the speculative adaptive lock +}; + +typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t; + +union KMP_ALIGN_CACHE kmp_adaptive_lock { + kmp_base_adaptive_lock_t lk; + kmp_lock_pool_t pool; + double lk_align; + char lk_pad[KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE)]; +}; +typedef union kmp_adaptive_lock kmp_adaptive_lock_t; + +#define GET_QLK_PTR(l) ((kmp_queuing_lock_t *)&(l)->lk.qlk) + +#endif // KMP_USE_ADAPTIVE_LOCKS + +// ---------------------------------------------------------------------------- +// DRDPA ticket locks. +struct kmp_base_drdpa_lock { + // All of the fields on the first cache line are only written when + // initializing or reconfiguring the lock. These are relatively rare + // operations, so data from the first cache line will usually stay resident in + // the cache of each thread trying to acquire the lock. + // + // initialized must be the first entry in the lock data structure! + KMP_ALIGN_CACHE + + volatile union kmp_drdpa_lock + *initialized; // points to the lock union if in initialized state + ident_t const *location; // Source code location of omp_init_lock(). + std::atomic<std::atomic<kmp_uint64> *> polls; + std::atomic<kmp_uint64> mask; // is 2**num_polls-1 for mod op + kmp_uint64 cleanup_ticket; // thread with cleanup ticket + std::atomic<kmp_uint64> *old_polls; // will deallocate old_polls + kmp_uint32 num_polls; // must be power of 2 + + // next_ticket it needs to exist in a separate cache line, as it is + // invalidated every time a thread takes a new ticket. + KMP_ALIGN_CACHE + + std::atomic<kmp_uint64> next_ticket; + + // now_serving is used to store our ticket value while we hold the lock. It + // has a slightly different meaning in the DRDPA ticket locks (where it is + // written by the acquiring thread) than it does in the simple ticket locks + // (where it is written by the releasing thread). + // + // Since now_serving is only read an written in the critical section, + // it is non-volatile, but it needs to exist on a separate cache line, + // as it is invalidated at every lock acquire. + // + // Likewise, the vars used for nested locks (owner_id and depth_locked) are + // only written by the thread owning the lock, so they are put in this cache + // line. owner_id is read by other threads, so it must be declared volatile. + KMP_ALIGN_CACHE + kmp_uint64 now_serving; // doesn't have to be volatile + volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked + kmp_int32 depth_locked; // depth locked + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock +}; + +typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t; + +union KMP_ALIGN_CACHE kmp_drdpa_lock { + kmp_base_drdpa_lock_t + lk; // This field must be first to allow static initializing. */ + kmp_lock_pool_t pool; + double lk_align; // use worst case alignment + char lk_pad[KMP_PAD(kmp_base_drdpa_lock_t, CACHE_LINE)]; +}; + +typedef union kmp_drdpa_lock kmp_drdpa_lock_t; + +extern int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); +extern int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); +extern int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); +extern void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck); +extern void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck); + +extern int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, + kmp_int32 gtid); +extern int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); +extern int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, + kmp_int32 gtid); +extern void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck); +extern void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck); + +// ============================================================================ +// Lock purposes. +// ============================================================================ + +// Bootstrap locks. +// +// Bootstrap locks -- very few locks used at library initialization time. +// Bootstrap locks are currently implemented as ticket locks. +// They could also be implemented as test and set lock, but cannot be +// implemented with other lock kinds as they require gtids which are not +// available at initialization time. + +typedef kmp_ticket_lock_t kmp_bootstrap_lock_t; + +#define KMP_BOOTSTRAP_LOCK_INITIALIZER(lock) KMP_TICKET_LOCK_INITIALIZER((lock)) +#define KMP_BOOTSTRAP_LOCK_INIT(lock) \ + kmp_bootstrap_lock_t lock = KMP_TICKET_LOCK_INITIALIZER(lock) + +static inline int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck) { + return __kmp_acquire_ticket_lock(lck, KMP_GTID_DNE); +} + +static inline int __kmp_test_bootstrap_lock(kmp_bootstrap_lock_t *lck) { + return __kmp_test_ticket_lock(lck, KMP_GTID_DNE); +} + +static inline void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck) { + __kmp_release_ticket_lock(lck, KMP_GTID_DNE); +} + +static inline void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck) { + __kmp_init_ticket_lock(lck); +} + +static inline void __kmp_destroy_bootstrap_lock(kmp_bootstrap_lock_t *lck) { + __kmp_destroy_ticket_lock(lck); +} + +// Internal RTL locks. +// +// Internal RTL locks are also implemented as ticket locks, for now. +// +// FIXME - We should go through and figure out which lock kind works best for +// each internal lock, and use the type declaration and function calls for +// that explicit lock kind (and get rid of this section). + +typedef kmp_ticket_lock_t kmp_lock_t; + +#define KMP_LOCK_INIT(lock) kmp_lock_t lock = KMP_TICKET_LOCK_INITIALIZER(lock) + +static inline int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid) { + return __kmp_acquire_ticket_lock(lck, gtid); +} + +static inline int __kmp_test_lock(kmp_lock_t *lck, kmp_int32 gtid) { + return __kmp_test_ticket_lock(lck, gtid); +} + +static inline void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid) { + __kmp_release_ticket_lock(lck, gtid); +} + +static inline void __kmp_init_lock(kmp_lock_t *lck) { + __kmp_init_ticket_lock(lck); +} + +static inline void __kmp_destroy_lock(kmp_lock_t *lck) { + __kmp_destroy_ticket_lock(lck); +} + +// User locks. +// +// Do not allocate objects of type union kmp_user_lock!!! This will waste space +// unless __kmp_user_lock_kind == lk_drdpa. Instead, check the value of +// __kmp_user_lock_kind and allocate objects of the type of the appropriate +// union member, and cast their addresses to kmp_user_lock_p. + +enum kmp_lock_kind { + lk_default = 0, + lk_tas, +#if KMP_USE_FUTEX + lk_futex, +#endif +#if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX + lk_hle, + lk_rtm, +#endif + lk_ticket, + lk_queuing, + lk_drdpa, +#if KMP_USE_ADAPTIVE_LOCKS + lk_adaptive +#endif // KMP_USE_ADAPTIVE_LOCKS +}; + +typedef enum kmp_lock_kind kmp_lock_kind_t; + +extern kmp_lock_kind_t __kmp_user_lock_kind; + +union kmp_user_lock { + kmp_tas_lock_t tas; +#if KMP_USE_FUTEX + kmp_futex_lock_t futex; +#endif + kmp_ticket_lock_t ticket; + kmp_queuing_lock_t queuing; + kmp_drdpa_lock_t drdpa; +#if KMP_USE_ADAPTIVE_LOCKS + kmp_adaptive_lock_t adaptive; +#endif // KMP_USE_ADAPTIVE_LOCKS + kmp_lock_pool_t pool; +}; + +typedef union kmp_user_lock *kmp_user_lock_p; + +#if !KMP_USE_DYNAMIC_LOCK + +extern size_t __kmp_base_user_lock_size; +extern size_t __kmp_user_lock_size; + +extern kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck); + +static inline kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck) { + KMP_DEBUG_ASSERT(__kmp_get_user_lock_owner_ != NULL); + return (*__kmp_get_user_lock_owner_)(lck); +} + +extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid); + +#if KMP_OS_LINUX && \ + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + +#define __kmp_acquire_user_lock_with_checks(lck, gtid) \ + if (__kmp_user_lock_kind == lk_tas) { \ + if (__kmp_env_consistency_check) { \ + char const *const func = "omp_set_lock"; \ + if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && \ + lck->tas.lk.depth_locked != -1) { \ + KMP_FATAL(LockNestableUsedAsSimple, func); \ + } \ + if ((gtid >= 0) && (lck->tas.lk.poll - 1 == gtid)) { \ + KMP_FATAL(LockIsAlreadyOwned, func); \ + } \ + } \ + if (lck->tas.lk.poll != 0 || \ + !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ + kmp_uint32 spins; \ + KMP_FSYNC_PREPARE(lck); \ + KMP_INIT_YIELD(spins); \ + do { \ + KMP_YIELD_OVERSUB_ELSE_SPIN(spins); \ + } while ( \ + lck->tas.lk.poll != 0 || \ + !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \ + } \ + KMP_FSYNC_ACQUIRED(lck); \ + } else { \ + KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL); \ + (*__kmp_acquire_user_lock_with_checks_)(lck, gtid); \ + } + +#else +static inline int __kmp_acquire_user_lock_with_checks(kmp_user_lock_p lck, + kmp_int32 gtid) { + KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL); + return (*__kmp_acquire_user_lock_with_checks_)(lck, gtid); +} +#endif + +extern int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid); + +#if KMP_OS_LINUX && \ + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + +#include "kmp_i18n.h" /* AC: KMP_FATAL definition */ +extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */ +static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck, + kmp_int32 gtid) { + if (__kmp_user_lock_kind == lk_tas) { + if (__kmp_env_consistency_check) { + char const *const func = "omp_test_lock"; + if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && + lck->tas.lk.depth_locked != -1) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + } + return ((lck->tas.lk.poll == 0) && + __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); + } else { + KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL); + return (*__kmp_test_user_lock_with_checks_)(lck, gtid); + } +} +#else +static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck, + kmp_int32 gtid) { + KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL); + return (*__kmp_test_user_lock_with_checks_)(lck, gtid); +} +#endif + +extern int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid); + +static inline void __kmp_release_user_lock_with_checks(kmp_user_lock_p lck, + kmp_int32 gtid) { + KMP_DEBUG_ASSERT(__kmp_release_user_lock_with_checks_ != NULL); + (*__kmp_release_user_lock_with_checks_)(lck, gtid); +} + +extern void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck); + +static inline void __kmp_init_user_lock_with_checks(kmp_user_lock_p lck) { + KMP_DEBUG_ASSERT(__kmp_init_user_lock_with_checks_ != NULL); + (*__kmp_init_user_lock_with_checks_)(lck); +} + +// We need a non-checking version of destroy lock for when the RTL is +// doing the cleanup as it can't always tell if the lock is nested or not. +extern void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck); + +static inline void __kmp_destroy_user_lock(kmp_user_lock_p lck) { + KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_ != NULL); + (*__kmp_destroy_user_lock_)(lck); +} + +extern void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck); + +static inline void __kmp_destroy_user_lock_with_checks(kmp_user_lock_p lck) { + KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_with_checks_ != NULL); + (*__kmp_destroy_user_lock_with_checks_)(lck); +} + +extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid); + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) + +#define __kmp_acquire_nested_user_lock_with_checks(lck, gtid, depth) \ + if (__kmp_user_lock_kind == lk_tas) { \ + if (__kmp_env_consistency_check) { \ + char const *const func = "omp_set_nest_lock"; \ + if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZE) && \ + lck->tas.lk.depth_locked == -1) { \ + KMP_FATAL(LockSimpleUsedAsNestable, func); \ + } \ + } \ + if (lck->tas.lk.poll - 1 == gtid) { \ + lck->tas.lk.depth_locked += 1; \ + *depth = KMP_LOCK_ACQUIRED_NEXT; \ + } else { \ + if ((lck->tas.lk.poll != 0) || \ + !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ + kmp_uint32 spins; \ + KMP_FSYNC_PREPARE(lck); \ + KMP_INIT_YIELD(spins); \ + do { \ + KMP_YIELD_OVERSUB_ELSE_SPIN(spins); \ + } while ( \ + (lck->tas.lk.poll != 0) || \ + !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); \ + } \ + lck->tas.lk.depth_locked = 1; \ + *depth = KMP_LOCK_ACQUIRED_FIRST; \ + } \ + KMP_FSYNC_ACQUIRED(lck); \ + } else { \ + KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL); \ + *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid); \ + } + +#else +static inline void +__kmp_acquire_nested_user_lock_with_checks(kmp_user_lock_p lck, kmp_int32 gtid, + int *depth) { + KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL); + *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid); +} +#endif + +extern int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid); + +#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck, + kmp_int32 gtid) { + if (__kmp_user_lock_kind == lk_tas) { + int retval; + if (__kmp_env_consistency_check) { + char const *const func = "omp_test_nest_lock"; + if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZE) && + lck->tas.lk.depth_locked == -1) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + } + KMP_DEBUG_ASSERT(gtid >= 0); + if (lck->tas.lk.poll - 1 == + gtid) { /* __kmp_get_tas_lock_owner( lck ) == gtid */ + return ++lck->tas.lk.depth_locked; /* same owner, depth increased */ + } + retval = ((lck->tas.lk.poll == 0) && + __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); + if (retval) { + KMP_MB(); + lck->tas.lk.depth_locked = 1; + } + return retval; + } else { + KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL); + return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid); + } +} +#else +static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck, + kmp_int32 gtid) { + KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL); + return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid); +} +#endif + +extern int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid); + +static inline int +__kmp_release_nested_user_lock_with_checks(kmp_user_lock_p lck, + kmp_int32 gtid) { + KMP_DEBUG_ASSERT(__kmp_release_nested_user_lock_with_checks_ != NULL); + return (*__kmp_release_nested_user_lock_with_checks_)(lck, gtid); +} + +extern void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck); + +static inline void +__kmp_init_nested_user_lock_with_checks(kmp_user_lock_p lck) { + KMP_DEBUG_ASSERT(__kmp_init_nested_user_lock_with_checks_ != NULL); + (*__kmp_init_nested_user_lock_with_checks_)(lck); +} + +extern void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck); + +static inline void +__kmp_destroy_nested_user_lock_with_checks(kmp_user_lock_p lck) { + KMP_DEBUG_ASSERT(__kmp_destroy_nested_user_lock_with_checks_ != NULL); + (*__kmp_destroy_nested_user_lock_with_checks_)(lck); +} + +// user lock functions which do not necessarily exist for all lock kinds. +// +// The "set" functions usually have wrapper routines that check for a NULL set +// function pointer and call it if non-NULL. +// +// In some cases, it makes sense to have a "get" wrapper function check for a +// NULL get function pointer and return NULL / invalid value / error code if +// the function pointer is NULL. +// +// In other cases, the calling code really should differentiate between an +// unimplemented function and one that is implemented but returning NULL / +// invalied value. If this is the case, no get function wrapper exists. + +extern int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck); + +// no set function; fields set durining local allocation + +extern const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck); + +static inline const ident_t *__kmp_get_user_lock_location(kmp_user_lock_p lck) { + if (__kmp_get_user_lock_location_ != NULL) { + return (*__kmp_get_user_lock_location_)(lck); + } else { + return NULL; + } +} + +extern void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, + const ident_t *loc); + +static inline void __kmp_set_user_lock_location(kmp_user_lock_p lck, + const ident_t *loc) { + if (__kmp_set_user_lock_location_ != NULL) { + (*__kmp_set_user_lock_location_)(lck, loc); + } +} + +extern kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck); + +extern void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, + kmp_lock_flags_t flags); + +static inline void __kmp_set_user_lock_flags(kmp_user_lock_p lck, + kmp_lock_flags_t flags) { + if (__kmp_set_user_lock_flags_ != NULL) { + (*__kmp_set_user_lock_flags_)(lck, flags); + } +} + +// The fuction which sets up all of the vtbl pointers for kmp_user_lock_t. +extern void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind); + +// Macros for binding user lock functions. +#define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) \ + { \ + __kmp_acquire##nest##user_lock_with_checks_ = (int (*)( \ + kmp_user_lock_p, kmp_int32))__kmp_acquire##nest##kind##_##suffix; \ + __kmp_release##nest##user_lock_with_checks_ = (int (*)( \ + kmp_user_lock_p, kmp_int32))__kmp_release##nest##kind##_##suffix; \ + __kmp_test##nest##user_lock_with_checks_ = (int (*)( \ + kmp_user_lock_p, kmp_int32))__kmp_test##nest##kind##_##suffix; \ + __kmp_init##nest##user_lock_with_checks_ = \ + (void (*)(kmp_user_lock_p))__kmp_init##nest##kind##_##suffix; \ + __kmp_destroy##nest##user_lock_with_checks_ = \ + (void (*)(kmp_user_lock_p))__kmp_destroy##nest##kind##_##suffix; \ + } + +#define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock) +#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) \ + KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks) +#define KMP_BIND_NESTED_USER_LOCK(kind) \ + KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock) +#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) \ + KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks) + +// User lock table & lock allocation +/* On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory + for lock variable, which is not enough to store a pointer, so we have to use + lock indexes instead of pointers and maintain lock table to map indexes to + pointers. + + + Note: The first element of the table is not a pointer to lock! It is a + pointer to previously allocated table (or NULL if it is the first table). + + Usage: + + if ( OMP_LOCK_T_SIZE < sizeof( <lock> ) ) { // or OMP_NEST_LOCK_T_SIZE + Lock table is fully utilized. User locks are indexes, so table is used on + user lock operation. + Note: it may be the case (lin_32) that we don't need to use a lock + table for regular locks, but do need the table for nested locks. + } + else { + Lock table initialized but not actually used. + } +*/ + +struct kmp_lock_table { + kmp_lock_index_t used; // Number of used elements + kmp_lock_index_t allocated; // Number of allocated elements + kmp_user_lock_p *table; // Lock table. +}; + +typedef struct kmp_lock_table kmp_lock_table_t; + +extern kmp_lock_table_t __kmp_user_lock_table; +extern kmp_user_lock_p __kmp_lock_pool; + +struct kmp_block_of_locks { + struct kmp_block_of_locks *next_block; + void *locks; +}; + +typedef struct kmp_block_of_locks kmp_block_of_locks_t; + +extern kmp_block_of_locks_t *__kmp_lock_blocks; +extern int __kmp_num_locks_in_block; + +extern kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, + kmp_int32 gtid, + kmp_lock_flags_t flags); +extern void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, + kmp_user_lock_p lck); +extern kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, + char const *func); +extern void __kmp_cleanup_user_locks(); + +#define KMP_CHECK_USER_LOCK_INIT() \ + { \ + if (!TCR_4(__kmp_init_user_locks)) { \ + __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); \ + if (!TCR_4(__kmp_init_user_locks)) { \ + TCW_4(__kmp_init_user_locks, TRUE); \ + } \ + __kmp_release_bootstrap_lock(&__kmp_initz_lock); \ + } \ + } + +#endif // KMP_USE_DYNAMIC_LOCK + +#undef KMP_PAD +#undef KMP_GTID_DNE + +#if KMP_USE_DYNAMIC_LOCK +// KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without +// breaking the current compatibility. Essential functionality of this new code +// is dynamic dispatch, but it also implements (or enables implementation of) +// hinted user lock and critical section which will be part of OMP 4.5 soon. +// +// Lock type can be decided at creation time (i.e., lock initialization), and +// subsequent lock function call on the created lock object requires type +// extraction and call through jump table using the extracted type. This type +// information is stored in two different ways depending on the size of the lock +// object, and we differentiate lock types by this size requirement - direct and +// indirect locks. +// +// Direct locks: +// A direct lock object fits into the space created by the compiler for an +// omp_lock_t object, and TAS/Futex lock falls into this category. We use low +// one byte of the lock object as the storage for the lock type, and appropriate +// bit operation is required to access the data meaningful to the lock +// algorithms. Also, to differentiate direct lock from indirect lock, 1 is +// written to LSB of the lock object. The newly introduced "hle" lock is also a +// direct lock. +// +// Indirect locks: +// An indirect lock object requires more space than the compiler-generated +// space, and it should be allocated from heap. Depending on the size of the +// compiler-generated space for the lock (i.e., size of omp_lock_t), this +// omp_lock_t object stores either the address of the heap-allocated indirect +// lock (void * fits in the object) or an index to the indirect lock table entry +// that holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this +// category, and the newly introduced "rtm" lock is also an indirect lock which +// was implemented on top of the Queuing lock. When the omp_lock_t object holds +// an index (not lock address), 0 is written to LSB to differentiate the lock +// from a direct lock, and the remaining part is the actual index to the +// indirect lock table. + +#include <stdint.h> // for uintptr_t + +// Shortcuts +#define KMP_USE_INLINED_TAS \ + (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1 +#define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0 + +// List of lock definitions; all nested locks are indirect locks. +// hle lock is xchg lock prefixed with XACQUIRE/XRELEASE. +// All nested locks are indirect lock types. +#if KMP_USE_TSX +#if KMP_USE_FUTEX +#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) +#define KMP_FOREACH_I_LOCK(m, a) \ + m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ + m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +#else +#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) +#define KMP_FOREACH_I_LOCK(m, a) \ + m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ + m(nested_tas, a) m(nested_ticket, a) m(nested_queuing, a) \ + m(nested_drdpa, a) +#endif // KMP_USE_FUTEX +#define KMP_LAST_D_LOCK lockseq_hle +#else +#if KMP_USE_FUTEX +#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) +#define KMP_FOREACH_I_LOCK(m, a) \ + m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_futex, a) \ + m(nested_ticket, a) m(nested_queuing, a) m(nested_drdpa, a) +#define KMP_LAST_D_LOCK lockseq_futex +#else +#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) +#define KMP_FOREACH_I_LOCK(m, a) \ + m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +#define KMP_LAST_D_LOCK lockseq_tas +#endif // KMP_USE_FUTEX +#endif // KMP_USE_TSX + +// Information used in dynamic dispatch +#define KMP_LOCK_SHIFT \ + 8 // number of low bits to be used as tag for direct locks +#define KMP_FIRST_D_LOCK lockseq_tas +#define KMP_FIRST_I_LOCK lockseq_ticket +#define KMP_LAST_I_LOCK lockseq_nested_drdpa +#define KMP_NUM_I_LOCKS \ + (locktag_nested_drdpa + 1) // number of indirect lock types + +// Base type for dynamic locks. +typedef kmp_uint32 kmp_dyna_lock_t; + +// Lock sequence that enumerates all lock kinds. Always make this enumeration +// consistent with kmp_lockseq_t in the include directory. +typedef enum { + lockseq_indirect = 0, +#define expand_seq(l, a) lockseq_##l, + KMP_FOREACH_D_LOCK(expand_seq, 0) KMP_FOREACH_I_LOCK(expand_seq, 0) +#undef expand_seq +} kmp_dyna_lockseq_t; + +// Enumerates indirect lock tags. +typedef enum { +#define expand_tag(l, a) locktag_##l, + KMP_FOREACH_I_LOCK(expand_tag, 0) +#undef expand_tag +} kmp_indirect_locktag_t; + +// Utility macros that extract information from lock sequences. +#define KMP_IS_D_LOCK(seq) \ + ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK) +#define KMP_IS_I_LOCK(seq) \ + ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK) +#define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq)-KMP_FIRST_I_LOCK) +#define KMP_GET_D_TAG(seq) ((seq) << 1 | 1) + +// Enumerates direct lock tags starting from indirect tag. +typedef enum { +#define expand_tag(l, a) locktag_##l = KMP_GET_D_TAG(lockseq_##l), + KMP_FOREACH_D_LOCK(expand_tag, 0) +#undef expand_tag +} kmp_direct_locktag_t; + +// Indirect lock type +typedef struct { + kmp_user_lock_p lock; + kmp_indirect_locktag_t type; +} kmp_indirect_lock_t; + +// Function tables for direct locks. Set/unset/test differentiate functions +// with/without consistency checking. +extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); +extern void (*(*__kmp_direct_destroy))(kmp_dyna_lock_t *); +extern int (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); +extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); +extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); + +// Function tables for indirect locks. Set/unset/test differentiate functions +// with/withuot consistency checking. +extern void (*__kmp_indirect_init[])(kmp_user_lock_p); +extern void (*(*__kmp_indirect_destroy))(kmp_user_lock_p); +extern int (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); +extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); +extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); + +// Extracts direct lock tag from a user lock pointer +#define KMP_EXTRACT_D_TAG(l) \ + (*((kmp_dyna_lock_t *)(l)) & ((1 << KMP_LOCK_SHIFT) - 1) & \ + -(*((kmp_dyna_lock_t *)(l)) & 1)) + +// Extracts indirect lock index from a user lock pointer +#define KMP_EXTRACT_I_INDEX(l) (*(kmp_lock_index_t *)(l) >> 1) + +// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t +// *) and op (operation type). +#define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)] + +// Returns function pointer to the indirect lock function with l +// (kmp_indirect_lock_t *) and op (operation type). +#define KMP_I_LOCK_FUNC(l, op) \ + __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type] + +// Initializes a direct lock with the given lock pointer and lock sequence. +#define KMP_INIT_D_LOCK(l, seq) \ + __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq) + +// Initializes an indirect lock with the given lock pointer and lock sequence. +#define KMP_INIT_I_LOCK(l, seq) \ + __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq) + +// Returns "free" lock value for the given lock type. +#define KMP_LOCK_FREE(type) (locktag_##type) + +// Returns "busy" lock value for the given lock teyp. +#define KMP_LOCK_BUSY(v, type) ((v) << KMP_LOCK_SHIFT | locktag_##type) + +// Returns lock value after removing (shifting) lock tag. +#define KMP_LOCK_STRIP(v) ((v) >> KMP_LOCK_SHIFT) + +// Initializes global states and data structures for managing dynamic user +// locks. +extern void __kmp_init_dynamic_user_locks(); + +// Allocates and returns an indirect lock with the given indirect lock tag. +extern kmp_indirect_lock_t * +__kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t); + +// Cleans up global states and data structures for managing dynamic user locks. +extern void __kmp_cleanup_indirect_user_locks(); + +// Default user lock sequence when not using hinted locks. +extern kmp_dyna_lockseq_t __kmp_user_lock_seq; + +// Jump table for "set lock location", available only for indirect locks. +extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, + const ident_t *); +#define KMP_SET_I_LOCK_LOCATION(lck, loc) \ + { \ + if (__kmp_indirect_set_location[(lck)->type] != NULL) \ + __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \ + } + +// Jump table for "set lock flags", available only for indirect locks. +extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, + kmp_lock_flags_t); +#define KMP_SET_I_LOCK_FLAGS(lck, flag) \ + { \ + if (__kmp_indirect_set_flags[(lck)->type] != NULL) \ + __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \ + } + +// Jump table for "get lock location", available only for indirect locks. +extern const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( + kmp_user_lock_p); +#define KMP_GET_I_LOCK_LOCATION(lck) \ + (__kmp_indirect_get_location[(lck)->type] != NULL \ + ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \ + : NULL) + +// Jump table for "get lock flags", available only for indirect locks. +extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( + kmp_user_lock_p); +#define KMP_GET_I_LOCK_FLAGS(lck) \ + (__kmp_indirect_get_flags[(lck)->type] != NULL \ + ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ + : NULL) + +#define KMP_I_LOCK_CHUNK \ + 1024 // number of kmp_indirect_lock_t objects to be allocated together + +// Lock table for indirect locks. +typedef struct kmp_indirect_lock_table { + kmp_indirect_lock_t **table; // blocks of indirect locks allocated + kmp_lock_index_t size; // size of the indirect lock table + kmp_lock_index_t next; // index to the next lock to be allocated +} kmp_indirect_lock_table_t; + +extern kmp_indirect_lock_table_t __kmp_i_lock_table; + +// Returns the indirect lock associated with the given index. +#define KMP_GET_I_LOCK(index) \ + (*(__kmp_i_lock_table.table + (index) / KMP_I_LOCK_CHUNK) + \ + (index) % KMP_I_LOCK_CHUNK) + +// Number of locks in a lock block, which is fixed to "1" now. +// TODO: No lock block implementation now. If we do support, we need to manage +// lock block data structure for each indirect lock type. +extern int __kmp_num_locks_in_block; + +// Fast lock table lookup without consistency checking +#define KMP_LOOKUP_I_LOCK(l) \ + ((OMP_LOCK_T_SIZE < sizeof(void *)) ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \ + : *((kmp_indirect_lock_t **)(l))) + +// Used once in kmp_error.cpp +extern kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); + +#else // KMP_USE_DYNAMIC_LOCK + +#define KMP_LOCK_BUSY(v, type) (v) +#define KMP_LOCK_FREE(type) 0 +#define KMP_LOCK_STRIP(v) (v) + +#endif // KMP_USE_DYNAMIC_LOCK + +// data structure for using backoff within spin locks. +typedef struct { + kmp_uint32 step; // current step + kmp_uint32 max_backoff; // upper bound of outer delay loop + kmp_uint32 min_tick; // size of inner delay loop in ticks (machine-dependent) +} kmp_backoff_t; + +// Runtime's default backoff parameters +extern kmp_backoff_t __kmp_spin_backoff_params; + +// Backoff function +extern void __kmp_spin_backoff(kmp_backoff_t *); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif /* KMP_LOCK_H */ |