aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2019-06-27 18:33:09 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2019-06-27 18:33:09 +0000
commita31ae2e44a8f91ca20aa03319603829d994a1635 (patch)
tree0e6fc882cb8358b6a15396206b3e415e679aed68
parent0afc673b1708437b5d9ff8ce0d50b41ab3b776b8 (diff)
[OPENMP][NVPTX]Relax flush directive.
Summary: According to the OpenMP standard, flush makes a thread’s temporary view of memory consistent with memory and enforces an order on the memory operations of the variables explicitly specified or implied. According to the Cuda toolkit documentation (https://docs.nvidia.com/cuda/archive/8.0/cuda-c-programming-guide/index.html#memory-fence-functions), __threadfence() functions provides required functionality. __threadfence_system() also provides required functionality, but it also includes some extra functionality, like synchronization of page-locked host memory, synchronization for the host, etc. It is not required per the standard and we can use more relaxed version of memory fence operation. Reviewers: grokos, gtbercea, kkwli0 Subscribers: guansong, jfb, jdoerfert, openmp-commits, caomhin Tags: #openmp Differential Revision: https://reviews.llvm.org/D62397 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@364572 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/sync.cu2
-rw-r--r--libomptarget/deviceRTLs/nvptx/test/parallel/flush.c35
2 files changed, 36 insertions, 1 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/sync.cu b/libomptarget/deviceRTLs/nvptx/src/sync.cu
index d81aa8f..688420e 100644
--- a/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -130,7 +130,7 @@ EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
EXTERN void __kmpc_flush(kmp_Ident *loc) {
PRINT0(LD_IO, "call kmpc_flush\n");
- __threadfence_system();
+ __threadfence();
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/libomptarget/deviceRTLs/nvptx/test/parallel/flush.c b/libomptarget/deviceRTLs/nvptx/test/parallel/flush.c
new file mode 100644
index 0000000..412538b
--- /dev/null
+++ b/libomptarget/deviceRTLs/nvptx/test/parallel/flush.c
@@ -0,0 +1,35 @@
+// RUN: %compile-run-and-check
+
+#include <omp.h>
+#include <stdio.h>
+
+int main(int argc, char *argv[]) {
+ int data, out, flag = 0;
+#pragma omp target parallel num_threads(64) map(tofrom \
+ : out, flag) map(to \
+ : data)
+ {
+ if (omp_get_thread_num() == 0) {
+ /* Write to the data buffer that will be read by thread */
+ data = 42;
+/* Flush data to thread 32 */
+#pragma omp flush(data)
+ /* Set flag to release thread 32 */
+#pragma omp atomic write
+ flag = 1;
+ } else if (omp_get_thread_num() == 32) {
+ /* Loop until we see the update to the flag */
+ int val;
+ do {
+#pragma omp atomic read
+ val = flag;
+ } while (val < 1);
+ out = data;
+#pragma omp flush(out)
+ }
+ }
+ // CHECK: out=42.
+ /* Value of out will be 42 */
+ printf("out=%d.\n", out);
+ return !(out == 42);
+}