diff options
author | Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com> | 2019-05-21 19:35:02 +0000 |
---|---|---|
committer | Gheorghe-Teodor Bercea <gheorghe-teod.bercea@ibm.com> | 2019-05-21 19:35:02 +0000 |
commit | 9994ae3c1c21dd4bbc57bb8f491f0a3cbc198336 (patch) | |
tree | 6f5cb694097ccc5631d83d9fedffa9455ae53545 | |
parent | 665d5c5112ab6ee1cba56733ec3a1b37e86aa55b (diff) |
[OpenMP][libomptarget] Enable requires flags for target libraries.
Summary:
Target link variables are currently implemented by creating a copy of the variables on the device side and unified memory never gets exploited.
When the prgram uses the:
```
#pragma omp requires unified_shared_memory
```
directive in conjunction with a declare target link, the linked variable is no longer allocated on the device and the host version is used instead.
This behavior is overridden by performing an explicit mapping.
A Clang side patch is required.
Reviewers: ABataev, AlexEichenberger, grokos, Hahnfeld
Reviewed By: AlexEichenberger, grokos, Hahnfeld
Subscribers: Hahnfeld, jfb, guansong, jdoerfert, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D60223
git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@361294 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | libomptarget/include/omptarget.h | 18 | ||||
-rw-r--r-- | libomptarget/src/device.cpp | 2 | ||||
-rw-r--r-- | libomptarget/src/device.h | 8 | ||||
-rw-r--r-- | libomptarget/src/exports | 1 | ||||
-rw-r--r-- | libomptarget/src/interface.cpp | 8 | ||||
-rw-r--r-- | libomptarget/src/rtl.cpp | 42 | ||||
-rw-r--r-- | libomptarget/src/rtl.h | 5 | ||||
-rw-r--r-- | libomptarget/test/offloading/requires.c | 46 |
8 files changed, 126 insertions, 4 deletions
diff --git a/libomptarget/include/omptarget.h b/libomptarget/include/omptarget.h index 512ddbf..ff6e85c 100644 --- a/libomptarget/include/omptarget.h +++ b/libomptarget/include/omptarget.h @@ -60,6 +60,21 @@ enum OpenMPOffloadingDeclareTargetFlags { OMP_DECLARE_TARGET_DTOR = 0x04 }; +enum OpenMPOffloadingRequiresDirFlags { + /// flag undefined. + OMP_REQ_UNDEFINED = 0x000, + /// no requires directive present. + OMP_REQ_NONE = 0x001, + /// reverse_offload clause. + OMP_REQ_REVERSE_OFFLOAD = 0x002, + /// unified_address clause. + OMP_REQ_UNIFIED_ADDRESS = 0x004, + /// unified_shared_memory clause. + OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, + /// dynamic_allocators clause. + OMP_REQ_DYNAMIC_ALLOCATORS = 0x010 +}; + /// This struct is a record of an entry point or global. For a function /// entry point the size is expected to be zero struct __tgt_offload_entry { @@ -113,6 +128,9 @@ int omp_target_associate_ptr(void *host_ptr, void *device_ptr, size_t size, size_t device_offset, int device_num); int omp_target_disassociate_ptr(void *host_ptr, int device_num); +/// add the clauses of the requires directives in a given file +void __tgt_register_requires(int64_t flags); + /// adds a target shared library to the target execution image void __tgt_register_lib(__tgt_bin_desc *desc); diff --git a/libomptarget/src/device.cpp b/libomptarget/src/device.cpp index ff99701..a946b92 100644 --- a/libomptarget/src/device.cpp +++ b/libomptarget/src/device.cpp @@ -152,7 +152,7 @@ LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) { // Used by target_data_begin // Return the target pointer begin (where the data will be moved). -// Allocate memory if this is the first occurrence if this mapping. +// Allocate memory if this is the first occurrence of this mapping. // Increment the reference counter. // If NULL is returned, then either data allocation failed or the user tried // to do an illegal mapping. diff --git a/libomptarget/src/device.h b/libomptarget/src/device.h index afd92ac..2e1ad71 100644 --- a/libomptarget/src/device.h +++ b/libomptarget/src/device.h @@ -98,11 +98,13 @@ struct DeviceTy { uint64_t loopTripCnt; + int64_t RTLRequiresFlags; + DeviceTy(RTLInfoTy *RTL) : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(), ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), - ShadowMtx(), loopTripCnt(0) {} + ShadowMtx(), loopTripCnt(0), RTLRequiresFlags(0) {} // The existence of mutexes makes DeviceTy non-copyable. We need to // provide a copy constructor and an assignment operator explicitly. @@ -112,7 +114,8 @@ struct DeviceTy { HostDataToTargetMap(d.HostDataToTargetMap), PendingCtorsDtors(d.PendingCtorsDtors), ShadowPtrMap(d.ShadowPtrMap), DataMapMtx(), PendingGlobalsMtx(), - ShadowMtx(), loopTripCnt(d.loopTripCnt) {} + ShadowMtx(), loopTripCnt(d.loopTripCnt), + RTLRequiresFlags(d.RTLRequiresFlags) {} DeviceTy& operator=(const DeviceTy &d) { DeviceID = d.DeviceID; @@ -124,6 +127,7 @@ struct DeviceTy { PendingCtorsDtors = d.PendingCtorsDtors; ShadowPtrMap = d.ShadowPtrMap; loopTripCnt = d.loopTripCnt; + RTLRequiresFlags = d.RTLRequiresFlags; return *this; } diff --git a/libomptarget/src/exports b/libomptarget/src/exports index 8114751..f13414e 100644 --- a/libomptarget/src/exports +++ b/libomptarget/src/exports @@ -1,5 +1,6 @@ VERS1.0 { global: + __tgt_register_requires; __tgt_register_lib; __tgt_unregister_lib; __tgt_target_data_begin; diff --git a/libomptarget/src/interface.cpp b/libomptarget/src/interface.cpp index 95a05a5..d055324 100644 --- a/libomptarget/src/interface.cpp +++ b/libomptarget/src/interface.cpp @@ -57,7 +57,7 @@ static void HandleTargetOutcome(bool success) { } break; case tgt_default: - FATAL_MESSAGE0(1, "default offloading policy must switched to " + FATAL_MESSAGE0(1, "default offloading policy must switched to " "mandatory or disabled"); break; case tgt_mandatory: @@ -69,6 +69,12 @@ static void HandleTargetOutcome(bool success) { } //////////////////////////////////////////////////////////////////////////////// +/// adds requires flags +EXTERN void __tgt_register_requires(int64_t flags) { + RTLs.RegisterRequires(flags); +} + +//////////////////////////////////////////////////////////////////////////////// /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { RTLs.RegisterLib(desc); diff --git a/libomptarget/src/rtl.cpp b/libomptarget/src/rtl.cpp index d1d882f..770ae36 100644 --- a/libomptarget/src/rtl.cpp +++ b/libomptarget/src/rtl.cpp @@ -186,6 +186,46 @@ static void RegisterGlobalCtorsDtorsForImage(__tgt_bin_desc *desc, } } +void RTLsTy::RegisterRequires(int64_t flags) { + // TODO: add more elaborate check. + // Minimal check: only set requires flags if previous value + // is undefined. This ensures that only the first call to this + // function will set the requires flags. All subsequent calls + // will be checked for compatibility. + assert(flags != OMP_REQ_UNDEFINED && + "illegal undefined flag for requires directive!"); + if (RequiresFlags == OMP_REQ_UNDEFINED) { + RequiresFlags = flags; + return; + } + + // If multiple compilation units are present enforce + // consistency across all of them for require clauses: + // - reverse_offload + // - unified_address + // - unified_shared_memory + if ((RequiresFlags & OMP_REQ_REVERSE_OFFLOAD) != + (flags & OMP_REQ_REVERSE_OFFLOAD)) { + FATAL_MESSAGE0(1, + "'#pragma omp requires reverse_offload' not used consistently!"); + } + if ((RequiresFlags & OMP_REQ_UNIFIED_ADDRESS) != + (flags & OMP_REQ_UNIFIED_ADDRESS)) { + FATAL_MESSAGE0(1, + "'#pragma omp requires unified_address' not used consistently!"); + } + if ((RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) != + (flags & OMP_REQ_UNIFIED_SHARED_MEMORY)) { + FATAL_MESSAGE0(1, + "'#pragma omp requires unified_shared_memory' not used consistently!"); + } + + // TODO: insert any other missing checks + + DP("New requires flags %ld compatible with existing %ld!\n", + flags, RequiresFlags); +} + void RTLsTy::RegisterLib(__tgt_bin_desc *desc) { // Attempt to load all plugins available in the system. std::call_once(initFlag, &RTLsTy::LoadRTLs, this); @@ -222,6 +262,8 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) { Devices[start + device_id].DeviceID = start + device_id; // RTL local device ID Devices[start + device_id].RTLDeviceID = device_id; + // RTL requires flags + Devices[start + device_id].RTLRequiresFlags = RequiresFlags; } // Initialize the index of this RTL and save it in the used RTLs. diff --git a/libomptarget/src/rtl.h b/libomptarget/src/rtl.h index 5778eee..381f23e 100644 --- a/libomptarget/src/rtl.h +++ b/libomptarget/src/rtl.h @@ -118,8 +118,13 @@ public: // binaries. std::vector<RTLInfoTy *> UsedRTLs; + int64_t RequiresFlags; + explicit RTLsTy() {} + // Register the clauses of the requires directive. + void RegisterRequires(int64_t flags); + // Register a shared library with all (compatible) RTLs. void RegisterLib(__tgt_bin_desc *desc); diff --git a/libomptarget/test/offloading/requires.c b/libomptarget/test/offloading/requires.c new file mode 100644 index 0000000..7f014d3 --- /dev/null +++ b/libomptarget/test/offloading/requires.c @@ -0,0 +1,46 @@ +// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu -allow-empty -check-prefix=DEBUG +// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu -allow-empty -check-prefix=DEBUG +// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu -allow-empty -check-prefix=DEBUG +// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu -allow-empty -check-prefix=DEBUG +// REQUIRES: libomptarget-debug + +/* + Test for the 'requires' clause check. + When a target region is used, the requires flags are set in the + runtime for the entire compilation unit. If the flags are set again, + (for whatever reason) the set must be consistent with previously + set values. +*/ +#include <stdio.h> +#include <omp.h> + +// --------------------------------------------------------------------------- +// Various definitions copied from OpenMP RTL + +extern void __tgt_register_requires(int64_t); + +// End of definitions copied from OpenMP RTL. +// --------------------------------------------------------------------------- + +void run_reg_requires() { + // Before the target region is registered, the requires registers the status + // of the requires clauses. Since there are no requires clauses in this file + // the flags state can only be OMP_REQ_NONE i.e. 1. + + // This is the 2nd time this function is called so it should print the debug + // info belonging to the check. + __tgt_register_requires(1); + __tgt_register_requires(1); + // DEBUG: New requires flags 1 compatible with existing 1! +} + +// --------------------------------------------------------------------------- +int main() { + run_reg_requires(); + +// This also runs reg requires for the first time. +#pragma omp target + {} + + return 0; +}
\ No newline at end of file |