aboutsummaryrefslogtreecommitdiff
path: root/final/libomptarget/deviceRTLs/nvptx/src/supporti.h
diff options
context:
space:
mode:
Diffstat (limited to 'final/libomptarget/deviceRTLs/nvptx/src/supporti.h')
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/src/supporti.h277
1 files changed, 277 insertions, 0 deletions
diff --git a/final/libomptarget/deviceRTLs/nvptx/src/supporti.h b/final/libomptarget/deviceRTLs/nvptx/src/supporti.h
new file mode 100644
index 0000000..b8f661c
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/src/supporti.h
@@ -0,0 +1,277 @@
+//===--------- supporti.h - NVPTX OpenMP support functions ------- CUDA -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Wrapper implementation to some functions natively supported by the GPU.
+//
+//===----------------------------------------------------------------------===//
+
+////////////////////////////////////////////////////////////////////////////////
+// Execution Parameters
+////////////////////////////////////////////////////////////////////////////////
+
+INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
+ execution_param = EMode;
+ execution_param |= RMode;
+}
+
+INLINE bool isGenericMode() { return (execution_param & ModeMask) == Generic; }
+
+INLINE bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; }
+
+INLINE bool isRuntimeUninitialized() {
+ return (execution_param & RuntimeMask) == RuntimeUninitialized;
+}
+
+INLINE bool isRuntimeInitialized() {
+ return (execution_param & RuntimeMask) == RuntimeInitialized;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Execution Modes based on location parameter fields
+////////////////////////////////////////////////////////////////////////////////
+
+INLINE bool checkSPMDMode(kmp_Ident *loc) {
+ if (!loc)
+ return isSPMDMode();
+
+ // If SPMD is true then we are not in the UNDEFINED state so
+ // we can return immediately.
+ if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
+ return true;
+
+ // If not in SPMD mode and runtime required is a valid
+ // combination of flags so we can return immediately.
+ if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
+ return false;
+
+ // We are in underfined state.
+ return isSPMDMode();
+}
+
+INLINE bool checkGenericMode(kmp_Ident *loc) {
+ return !checkSPMDMode(loc);
+}
+
+INLINE bool checkRuntimeUninitialized(kmp_Ident *loc) {
+ if (!loc)
+ return isRuntimeUninitialized();
+
+ // If runtime is required then we know we can't be
+ // in the undefined mode. We can return immediately.
+ if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
+ return false;
+
+ // If runtime is required then we need to check is in
+ // SPMD mode or not. If not in SPMD mode then we end
+ // up in the UNDEFINED state that marks the orphaned
+ // functions.
+ if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
+ return true;
+
+ // Check if we are in an UNDEFINED state. Undefined is denoted by
+ // non-SPMD + noRuntimeRequired which is a combination that
+ // cannot actually happen. Undefined states is used to mark orphaned
+ // functions.
+ return isRuntimeUninitialized();
+}
+
+INLINE bool checkRuntimeInitialized(kmp_Ident *loc) {
+ return !checkRuntimeUninitialized(loc);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// support: get info from machine
+////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// Calls to the NVPTX layer (assuming 1D layout)
+//
+////////////////////////////////////////////////////////////////////////////////
+
+INLINE int GetThreadIdInBlock() { return threadIdx.x; }
+
+INLINE int GetBlockIdInKernel() { return blockIdx.x; }
+
+INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; }
+
+INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; }
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// Calls to the Generic Scheme Implementation Layer (assuming 1D layout)
+//
+////////////////////////////////////////////////////////////////////////////////
+
+// The master thread id is the first thread (lane) of the last warp.
+// Thread id is 0 indexed.
+// E.g: If NumThreads is 33, master id is 32.
+// If NumThreads is 64, master id is 32.
+// If NumThreads is 97, master id is 96.
+// If NumThreads is 1024, master id is 992.
+//
+// Called in Generic Execution Mode only.
+INLINE int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); }
+
+// The last warp is reserved for the master; other warps are workers.
+// Called in Generic Execution Mode only.
+INLINE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
+
+////////////////////////////////////////////////////////////////////////////////
+// get thread id in team
+
+// This function may be called in a parallel region by the workers
+// or a serial region by the master. If the master (whose CUDA thread
+// id is GetMasterThreadID()) calls this routine, we return 0 because
+// it is a shadow for the first worker.
+INLINE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
+ // Implemented using control flow (predication) instead of with a modulo
+ // operation.
+ int tid = GetThreadIdInBlock();
+ if (!isSPMDExecutionMode && tid >= GetMasterThreadID())
+ return 0;
+ else
+ return tid;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// OpenMP Thread Support Layer
+//
+////////////////////////////////////////////////////////////////////////////////
+
+INLINE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode,
+ bool isRuntimeUninitialized) {
+ // omp_thread_num
+ int rc;
+
+ if (isRuntimeUninitialized) {
+ ASSERT0(LT_FUSSY, isSPMDExecutionMode,
+ "Uninitialized runtime with non-SPMD mode.");
+ // For level 2 parallelism all parallel regions are executed sequentially.
+ if (parallelLevel > 0)
+ rc = 0;
+ else
+ rc = GetThreadIdInBlock();
+ } else {
+ omptarget_nvptx_TaskDescr *currTaskDescr =
+ omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId);
+ rc = currTaskDescr->ThreadId();
+ }
+ return rc;
+}
+
+INLINE int GetNumberOfOmpThreads(int threadId, bool isSPMDExecutionMode,
+ bool isRuntimeUninitialized) {
+ // omp_num_threads
+ int rc;
+
+ if (isRuntimeUninitialized) {
+ ASSERT0(LT_FUSSY, isSPMDExecutionMode,
+ "Uninitialized runtime with non-SPMD mode.");
+ // For level 2 parallelism all parallel regions are executed sequentially.
+ if (parallelLevel > 0)
+ rc = 1;
+ else
+ rc = GetNumberOfThreadsInBlock();
+ } else {
+ omptarget_nvptx_TaskDescr *currTaskDescr =
+ omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId);
+ ASSERT0(LT_FUSSY, currTaskDescr, "expected a top task descr");
+ rc = currTaskDescr->ThreadsInTeam();
+ }
+
+ return rc;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Team id linked to OpenMP
+
+INLINE int GetOmpTeamId() {
+ // omp_team_num
+ return GetBlockIdInKernel(); // assume 1 block per team
+}
+
+INLINE int GetNumberOfOmpTeams() {
+ // omp_num_teams
+ return GetNumberOfBlocksInKernel(); // assume 1 block per team
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Masters
+
+INLINE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); }
+
+////////////////////////////////////////////////////////////////////////////////
+// get OpenMP number of procs
+
+// Get the number of processors in the device.
+INLINE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) {
+ if (!isSPMDExecutionMode)
+ return GetNumberOfWorkersInTeam();
+ return GetNumberOfThreadsInBlock();
+}
+
+INLINE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) {
+ return GetNumberOfProcsInDevice(isSPMDExecutionMode);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Memory
+////////////////////////////////////////////////////////////////////////////////
+
+INLINE unsigned long PadBytes(unsigned long size,
+ unsigned long alignment) // must be a power of 2
+{
+ // compute the necessary padding to satisfy alignment constraint
+ ASSERT(LT_FUSSY, (alignment & (alignment - 1)) == 0,
+ "alignment %lu is not a power of 2\n", alignment);
+ return (~(unsigned long)size + 1) & (alignment - 1);
+}
+
+INLINE void *SafeMalloc(size_t size, const char *msg) // check if success
+{
+ void *ptr = malloc(size);
+ PRINT(LD_MEM, "malloc data of size %llu for %s: 0x%llx\n",
+ (unsigned long long)size, msg, (unsigned long long)ptr);
+ return ptr;
+}
+
+INLINE void *SafeFree(void *ptr, const char *msg) {
+ PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg);
+ free(ptr);
+ return NULL;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Named Barrier Routines
+////////////////////////////////////////////////////////////////////////////////
+
+INLINE void named_sync(const int barrier, const int num_threads) {
+ asm volatile("bar.sync %0, %1;"
+ :
+ : "r"(barrier), "r"(num_threads)
+ : "memory");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Teams Reduction Scratchpad Helpers
+////////////////////////////////////////////////////////////////////////////////
+
+INLINE unsigned int *GetTeamsReductionTimestamp() {
+ return static_cast<unsigned int *>(ReductionScratchpadPtr);
+}
+
+INLINE char *GetTeamsReductionScratchpad() {
+ return static_cast<char *>(ReductionScratchpadPtr) + 256;
+}
+
+INLINE void SetTeamsReductionScratchpadPtr(void *ScratchpadPtr) {
+ ReductionScratchpadPtr = ScratchpadPtr;
+}