aboutsummaryrefslogtreecommitdiff
path: root/rc3/libomptarget/deviceRTLs/nvptx/src/sync.cu
diff options
context:
space:
mode:
Diffstat (limited to 'rc3/libomptarget/deviceRTLs/nvptx/src/sync.cu')
-rw-r--r--rc3/libomptarget/deviceRTLs/nvptx/src/sync.cu146
1 files changed, 146 insertions, 0 deletions
diff --git a/rc3/libomptarget/deviceRTLs/nvptx/src/sync.cu b/rc3/libomptarget/deviceRTLs/nvptx/src/sync.cu
new file mode 100644
index 0000000..c89dee2
--- /dev/null
+++ b/rc3/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -0,0 +1,146 @@
+//===------------ sync.h - NVPTX OpenMP synchronizations --------- CUDA -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.txt for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Include all synchronization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "omptarget-nvptx.h"
+
+////////////////////////////////////////////////////////////////////////////////
+// KMP Ordered calls
+////////////////////////////////////////////////////////////////////////////////
+
+EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t tid) {
+ PRINT0(LD_IO, "call kmpc_ordered\n");
+}
+
+EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t tid) {
+ PRINT0(LD_IO, "call kmpc_end_ordered\n");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// KMP Barriers
+////////////////////////////////////////////////////////////////////////////////
+
+// a team is a block: we can use CUDA native synchronization mechanism
+// FIXME: what if not all threads (warps) participate to the barrier?
+// We may need to implement it differently
+
+EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc_ref, int32_t tid) {
+ PRINT0(LD_IO, "call kmpc_cancel_barrier\n");
+ __kmpc_barrier(loc_ref, tid);
+ PRINT0(LD_SYNC, "completed kmpc_cancel_barrier\n");
+ return 0;
+}
+
+EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
+ if (checkRuntimeUninitialized(loc_ref)) {
+ ASSERT0(LT_FUSSY, checkSPMDMode(loc_ref),
+ "Expected SPMD mode with uninitialized runtime.");
+ __kmpc_barrier_simple_spmd(loc_ref, tid);
+ } else {
+ tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc_ref));
+ omptarget_nvptx_TaskDescr *currTaskDescr =
+ omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(tid);
+ int numberOfActiveOMPThreads = GetNumberOfOmpThreads(
+ tid, checkSPMDMode(loc_ref), /*isRuntimeUninitialized=*/false);
+ if (numberOfActiveOMPThreads > 1) {
+ if (checkSPMDMode(loc_ref)) {
+ __kmpc_barrier_simple_spmd(loc_ref, tid);
+ } else {
+ // The #threads parameter must be rounded up to the WARPSIZE.
+ int threads =
+ WARPSIZE * ((numberOfActiveOMPThreads + WARPSIZE - 1) / WARPSIZE);
+
+ PRINT(LD_SYNC,
+ "call kmpc_barrier with %d omp threads, sync parameter %d\n",
+ (int)numberOfActiveOMPThreads, (int)threads);
+ // Barrier #1 is for synchronization among active threads.
+ named_sync(L1_BARRIER, threads);
+ }
+ } // numberOfActiveOMPThreads > 1
+ PRINT0(LD_SYNC, "completed kmpc_barrier\n");
+ }
+}
+
+// Emit a simple barrier call in SPMD mode. Assumes the caller is in an L0
+// parallel region and that all worker threads participate.
+EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
+ PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
+ // FIXME: use __syncthreads instead when the function copy is fixed in LLVM.
+ __SYNCTHREADS();
+ PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
+}
+
+// Emit a simple barrier call in Generic mode. Assumes the caller is in an L0
+// parallel region and that all worker threads participate.
+EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid) {
+ int numberOfActiveOMPThreads = GetNumberOfThreadsInBlock() - WARPSIZE;
+ // The #threads parameter must be rounded up to the WARPSIZE.
+ int threads =
+ WARPSIZE * ((numberOfActiveOMPThreads + WARPSIZE - 1) / WARPSIZE);
+
+ PRINT(LD_SYNC,
+ "call kmpc_barrier_simple_generic with %d omp threads, sync parameter "
+ "%d\n",
+ (int)numberOfActiveOMPThreads, (int)threads);
+ // Barrier #1 is for synchronization among active threads.
+ named_sync(L1_BARRIER, threads);
+ PRINT0(LD_SYNC, "completed kmpc_barrier_simple_generic\n");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// KMP MASTER
+////////////////////////////////////////////////////////////////////////////////
+
+EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid) {
+ PRINT0(LD_IO, "call kmpc_master\n");
+ return IsTeamMaster(global_tid);
+}
+
+EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid) {
+ PRINT0(LD_IO, "call kmpc_end_master\n");
+ ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// KMP SINGLE
+////////////////////////////////////////////////////////////////////////////////
+
+EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid) {
+ PRINT0(LD_IO, "call kmpc_single\n");
+ // decide to implement single with master; master get the single
+ return IsTeamMaster(global_tid);
+}
+
+EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
+ PRINT0(LD_IO, "call kmpc_end_single\n");
+ // decide to implement single with master: master get the single
+ ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
+ // sync barrier is explicitely called... so that is not a problem
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Flush
+////////////////////////////////////////////////////////////////////////////////
+
+EXTERN void __kmpc_flush(kmp_Ident *loc) {
+ PRINT0(LD_IO, "call kmpc_flush\n");
+ __threadfence_system();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Vote
+////////////////////////////////////////////////////////////////////////////////
+
+EXTERN int32_t __kmpc_warp_active_thread_mask() {
+ PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n");
+ return __ACTIVEMASK();
+}