aboutsummaryrefslogtreecommitdiff
path: root/final/libomptarget/deviceRTLs/nvptx/test
diff options
context:
space:
mode:
Diffstat (limited to 'final/libomptarget/deviceRTLs/nvptx/test')
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/test/CMakeLists.txt26
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/test/api/ignored.c38
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/test/api/max_threads.c46
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/test/api/thread_limit.c72
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c55
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/test/lit.cfg69
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/test/lit.site.cfg.in14
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/test/parallel/flush.c35
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/test/parallel/level.c139
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/test/parallel/nested.c136
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/test/parallel/num_threads.c102
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp51
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/test/parallel/thread_limit.c77
-rw-r--r--final/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c22
14 files changed, 882 insertions, 0 deletions
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/CMakeLists.txt b/final/libomptarget/deviceRTLs/nvptx/test/CMakeLists.txt
new file mode 100644
index 0000000..33945d1
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/test/CMakeLists.txt
@@ -0,0 +1,26 @@
+if(NOT OPENMP_TEST_COMPILER_ID STREQUAL "Clang")
+ # Silently return, no need to annoy the user.
+ return()
+endif()
+
+set(deps omptarget-nvptx omptarget omp)
+if(LIBOMPTARGET_NVPTX_ENABLE_BCLIB)
+ set(deps ${deps} omptarget-nvptx-bc)
+endif()
+
+# Don't run by default.
+set(EXCLUDE_FROM_ALL True)
+# Run with only one thread to only launch one application to the GPU at a time.
+add_openmp_testsuite(check-libomptarget-nvptx
+ "Running libomptarget-nvptx tests" ${CMAKE_CURRENT_BINARY_DIR}
+ DEPENDS ${deps} ARGS -j1)
+
+set(LIBOMPTARGET_NVPTX_TEST_FLAGS "" CACHE STRING
+ "Extra compiler flags to send to the test compiler.")
+set(LIBOMPTARGET_NVPTX_TEST_OPENMP_FLAGS
+ "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda" CACHE STRING
+ "OpenMP compiler flags to use for testing libomptarget-nvptx.")
+
+# Configure the lit.site.cfg.in file
+set(AUTO_GEN_COMMENT "## Autogenerated by libomptarget-nvptx configuration.\n# Do not edit!")
+configure_file(lit.site.cfg.in lit.site.cfg @ONLY)
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/api/ignored.c b/final/libomptarget/deviceRTLs/nvptx/test/api/ignored.c
new file mode 100644
index 0000000..1fa9ae0
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/test/api/ignored.c
@@ -0,0 +1,38 @@
+// RUN: %compile-run-and-check
+
+#include <omp.h>
+#include <stdio.h>
+
+const int MaxThreads = 1024;
+
+int main(int argc, char *argv[]) {
+ int cancellation = -1, dynamic = -1, nested = -1, maxActiveLevels = -1;
+
+ #pragma omp target map(cancellation, dynamic, nested, maxActiveLevels)
+ {
+ // libomptarget-nvptx doesn't support cancellation.
+ cancellation = omp_get_cancellation();
+
+ // No support for dynamic adjustment of the number of threads.
+ omp_set_dynamic(1);
+ dynamic = omp_get_dynamic();
+
+ // libomptarget-nvptx doesn't support nested parallelism.
+ omp_set_nested(1);
+ nested = omp_get_nested();
+
+ omp_set_max_active_levels(42);
+ maxActiveLevels = omp_get_max_active_levels();
+ }
+
+ // CHECK: cancellation = 0
+ printf("cancellation = %d\n", cancellation);
+ // CHECK: dynamic = 0
+ printf("dynamic = %d\n", dynamic);
+ // CHECK: nested = 0
+ printf("nested = %d\n", nested);
+ // CHECK: maxActiveLevels = 1
+ printf("maxActiveLevels = %d\n", maxActiveLevels);
+
+ return 0;
+}
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/api/max_threads.c b/final/libomptarget/deviceRTLs/nvptx/test/api/max_threads.c
new file mode 100644
index 0000000..d0d9f31
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/test/api/max_threads.c
@@ -0,0 +1,46 @@
+// RUN: %compile-run-and-check
+
+#include <omp.h>
+#include <stdio.h>
+
+int main(int argc, char *argv[]) {
+ int MaxThreadsL1 = -1, MaxThreadsL2 = -1;
+
+#pragma omp declare reduction(unique:int \
+ : omp_out = (omp_in == 1 ? omp_in : omp_out)) \
+ initializer(omp_priv = -1)
+
+ // Non-SPMD mode.
+#pragma omp target teams map(MaxThreadsL1, MaxThreadsL2) thread_limit(32) \
+ num_teams(1)
+ {
+ MaxThreadsL1 = omp_get_max_threads();
+#pragma omp parallel reduction(unique : MaxThreadsL2)
+ { MaxThreadsL2 = omp_get_max_threads(); }
+ }
+
+ // CHECK: Non-SPMD MaxThreadsL1 = 32
+ printf("Non-SPMD MaxThreadsL1 = %d\n", MaxThreadsL1);
+ // CHECK: Non-SPMD MaxThreadsL2 = 1
+ printf("Non-SPMD MaxThreadsL2 = %d\n", MaxThreadsL2);
+
+ // SPMD mode with full runtime
+ MaxThreadsL2 = -1;
+#pragma omp target parallel reduction(unique : MaxThreadsL2)
+ { MaxThreadsL2 = omp_get_max_threads(); }
+
+ // CHECK: SPMD with full runtime MaxThreadsL2 = 1
+ printf("SPMD with full runtime MaxThreadsL2 = %d\n", MaxThreadsL2);
+
+ // SPMD mode without runtime
+ MaxThreadsL2 = -1;
+#pragma omp target parallel for reduction(unique : MaxThreadsL2)
+ for (int I = 0; I < 2; ++I) {
+ MaxThreadsL2 = omp_get_max_threads();
+ }
+
+ // CHECK: SPMD without runtime MaxThreadsL2 = 1
+ printf("SPMD without runtime MaxThreadsL2 = %d\n", MaxThreadsL2);
+
+ return 0;
+}
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/api/thread_limit.c b/final/libomptarget/deviceRTLs/nvptx/test/api/thread_limit.c
new file mode 100644
index 0000000..626d620
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/test/api/thread_limit.c
@@ -0,0 +1,72 @@
+// RUN: %compile-run-and-check
+
+#include <omp.h>
+#include <stdio.h>
+
+int main(int argc, char *argv[]) {
+ int ThreadLimitL0 = -1, ThreadLimitL1 = -1, ThreadLimitL2 = -1;
+
+#pragma omp declare reduction(unique64:int \
+ : omp_out = (omp_in == 64 ? omp_in : omp_out)) \
+ initializer(omp_priv = -1)
+#pragma omp declare reduction(unique32:int \
+ : omp_out = (omp_in == 32 ? omp_in : omp_out)) \
+ initializer(omp_priv = -1)
+
+ // Non-SPMD mode.
+#pragma omp target teams map(ThreadLimitL0, ThreadLimitL1, ThreadLimitL2) \
+ thread_limit(64) num_teams(1)
+ {
+ ThreadLimitL0 = omp_get_thread_limit();
+#pragma omp parallel reduction(unique64 \
+ : ThreadLimitL1, ThreadLimitL2) num_threads(32)
+ {
+ ThreadLimitL1 = omp_get_thread_limit();
+#pragma omp parallel reduction(unique64 : ThreadLimitL2)
+ { ThreadLimitL2 = omp_get_thread_limit(); }
+ }
+ }
+
+ // CHECK: Non-SPMD ThreadLimitL0 = 64
+ printf("Non-SPMD ThreadLimitL0 = %d\n", ThreadLimitL0);
+ // CHECK: Non-SPMD ThreadLimitL1 = 64
+ printf("Non-SPMD ThreadLimitL1 = %d\n", ThreadLimitL1);
+ // CHECK: Non-SPMD ThreadLimitL2 = 64
+ printf("Non-SPMD ThreadLimitL2 = %d\n", ThreadLimitL2);
+
+ // SPMD mode with full runtime
+ ThreadLimitL1 = -1;
+ ThreadLimitL2 = -1;
+#pragma omp target parallel reduction(unique32 \
+ : ThreadLimitL1, ThreadLimitL2) \
+ num_threads(32)
+ {
+ ThreadLimitL1 = omp_get_thread_limit();
+#pragma omp parallel reduction(unique32 : ThreadLimitL2)
+ { ThreadLimitL2 = omp_get_thread_limit(); }
+ }
+
+ // CHECK: SPMD with full runtime ThreadLimitL1 = 32
+ printf("SPMD with full runtime ThreadLimitL1 = %d\n", ThreadLimitL1);
+ // CHECK: SPMD with full runtime ThreadLimitL2 = 32
+ printf("SPMD with full runtime ThreadLimitL2 = %d\n", ThreadLimitL2);
+
+ // SPMD mode without runtime
+ ThreadLimitL1 = -1;
+ ThreadLimitL2 = -1;
+#pragma omp target parallel for reduction(unique32 \
+ : ThreadLimitL1, ThreadLimitL2) \
+ num_threads(32)
+ for (int I = 0; I < 2; ++I) {
+ ThreadLimitL1 = omp_get_thread_limit();
+#pragma omp parallel reduction(unique32 : ThreadLimitL2)
+ { ThreadLimitL2 = omp_get_thread_limit(); }
+ }
+
+ // CHECK: SPMD without runtime ThreadLimitL1 = 32
+ printf("SPMD without runtime ThreadLimitL1 = %d\n", ThreadLimitL1);
+ // CHECK: SPMD without runtime ThreadLimitL2 = 32
+ printf("SPMD without runtime ThreadLimitL2 = %d\n", ThreadLimitL2);
+
+ return 0;
+}
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c b/final/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c
new file mode 100644
index 0000000..dd17ae7
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c
@@ -0,0 +1,55 @@
+// RUN: %compile-run-and-check
+
+#include <omp.h>
+#include <stdio.h>
+
+#pragma omp declare target
+static void putValueInParallel(int *ptr, int value) {
+ #pragma omp parallel
+ {
+ *ptr = value;
+ }
+}
+
+static int getId() {
+ int id;
+ putValueInParallel(&id, omp_get_thread_num());
+ return id;
+}
+#pragma omp end declare target
+
+const int MaxThreads = 1024;
+const int Threads = 64;
+
+int main(int argc, char *argv[]) {
+ int master;
+ int check[MaxThreads];
+ for (int i = 0; i < MaxThreads; i++) {
+ check[i] = 0;
+ }
+
+ #pragma omp target map(master, check[:])
+ {
+ master = getId();
+
+ #pragma omp parallel num_threads(Threads)
+ {
+ check[omp_get_thread_num()] = getId();
+ }
+ }
+
+ // CHECK: master = 0.
+ printf("master = %d.\n", master);
+ // CHECK-NOT: invalid
+ for (int i = 0; i < MaxThreads; i++) {
+ if (i < Threads) {
+ if (check[i] != i) {
+ printf("invalid: check[%d] should be %d, is %d\n", i, i, check[i]);
+ }
+ } else if (check[i] != 0) {
+ printf("invalid: check[%d] should be 0, is %d\n", i, check[i]);
+ }
+ }
+
+ return 0;
+}
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/lit.cfg b/final/libomptarget/deviceRTLs/nvptx/test/lit.cfg
new file mode 100644
index 0000000..0774c25
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/test/lit.cfg
@@ -0,0 +1,69 @@
+# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79:
+# Configuration file for the 'lit' test runner.
+
+import os
+import lit.formats
+
+# Tell pylint that we know config and lit_config exist somewhere.
+if 'PYLINT_IMPORT' in os.environ:
+ config = object()
+ lit_config = object()
+
+def prepend_library_path(name, value, sep):
+ if name in config.environment:
+ config.environment[name] = value + sep + config.environment[name]
+ else:
+ config.environment[name] = value
+
+# name: The name of this test suite.
+config.name = 'libomptarget-nvptx'
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = ['.c', '.cpp', '.cc']
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# test_exec_root: The root object directory where output is placed
+config.test_exec_root = config.binary_dir
+
+# test format
+config.test_format = lit.formats.ShTest()
+
+# compiler flags
+config.test_flags = " -I " + config.omp_header_directory + \
+ " -L " + config.library_dir + \
+ " --libomptarget-nvptx-path=" + config.library_dir;
+
+if config.omp_host_rtl_directory:
+ config.test_flags = config.test_flags + \
+ " -L " + config.omp_host_rtl_directory
+
+config.test_flags = config.test_flags + " " + config.test_extra_flags
+
+# Setup environment to find dynamic library at runtime.
+prepend_library_path('LD_LIBRARY_PATH', config.library_dir, ":")
+prepend_library_path('LD_LIBRARY_PATH', config.omp_host_rtl_directory, ":")
+
+# Forbid fallback to host.
+config.environment["OMP_TARGET_OFFLOAD"] = "MANDATORY"
+
+# substitutions
+config.substitutions.append(("%compilexx-run-and-check",
+ "%compilexx-and-run | " + config.libomptarget_filecheck + " %s"))
+config.substitutions.append(("%compile-run-and-check",
+ "%compile-and-run | " + config.libomptarget_filecheck + " %s"))
+config.substitutions.append(("%compilexx-and-run", "%compilexx && %run"))
+config.substitutions.append(("%compile-and-run", "%compile && %run"))
+
+config.substitutions.append(("%compilexx",
+ "%clangxx %openmp_flags %flags %s -o %t"))
+config.substitutions.append(("%compile",
+ "%clang %openmp_flags %flags %s -o %t"))
+
+config.substitutions.append(("%clangxx", config.test_cxx_compiler))
+config.substitutions.append(("%clang", config.test_c_compiler))
+config.substitutions.append(("%openmp_flags", config.test_openmp_flags))
+config.substitutions.append(("%flags", config.test_flags))
+
+config.substitutions.append(("%run", "%t"))
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/lit.site.cfg.in b/final/libomptarget/deviceRTLs/nvptx/test/lit.site.cfg.in
new file mode 100644
index 0000000..d9c14cb
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/test/lit.site.cfg.in
@@ -0,0 +1,14 @@
+@AUTO_GEN_COMMENT@
+
+config.test_c_compiler = "@OPENMP_TEST_C_COMPILER@"
+config.test_cxx_compiler = "@OPENMP_TEST_CXX_COMPILER@"
+config.test_openmp_flags = "@LIBOMPTARGET_NVPTX_TEST_OPENMP_FLAGS@"
+config.test_extra_flags = "@LIBOMPTARGET_NVPTX_TEST_FLAGS@"
+config.binary_dir = "@CMAKE_CURRENT_BINARY_DIR@"
+config.library_dir = "@LIBOMPTARGET_LIBRARY_DIR@"
+config.omp_header_directory = "@LIBOMPTARGET_OPENMP_HEADER_FOLDER@"
+config.omp_host_rtl_directory = "@LIBOMPTARGET_OPENMP_HOST_RTL_FOLDER@"
+config.libomptarget_filecheck = "@OPENMP_FILECHECK_EXECUTABLE@"
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg")
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/parallel/flush.c b/final/libomptarget/deviceRTLs/nvptx/test/parallel/flush.c
new file mode 100644
index 0000000..412538b
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/test/parallel/flush.c
@@ -0,0 +1,35 @@
+// RUN: %compile-run-and-check
+
+#include <omp.h>
+#include <stdio.h>
+
+int main(int argc, char *argv[]) {
+ int data, out, flag = 0;
+#pragma omp target parallel num_threads(64) map(tofrom \
+ : out, flag) map(to \
+ : data)
+ {
+ if (omp_get_thread_num() == 0) {
+ /* Write to the data buffer that will be read by thread */
+ data = 42;
+/* Flush data to thread 32 */
+#pragma omp flush(data)
+ /* Set flag to release thread 32 */
+#pragma omp atomic write
+ flag = 1;
+ } else if (omp_get_thread_num() == 32) {
+ /* Loop until we see the update to the flag */
+ int val;
+ do {
+#pragma omp atomic read
+ val = flag;
+ } while (val < 1);
+ out = data;
+#pragma omp flush(out)
+ }
+ }
+ // CHECK: out=42.
+ /* Value of out will be 42 */
+ printf("out=%d.\n", out);
+ return !(out == 42);
+}
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/parallel/level.c b/final/libomptarget/deviceRTLs/nvptx/test/parallel/level.c
new file mode 100644
index 0000000..edb00e0
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/test/parallel/level.c
@@ -0,0 +1,139 @@
+// RUN: %compile-run-and-check
+
+#include <omp.h>
+#include <stdio.h>
+
+const int MaxThreads = 1024;
+const int NumThreads = 64;
+
+int main(int argc, char *argv[]) {
+ int level = -1, activeLevel = -1;
+ // The expected value is -1, initialize to different value.
+ int ancestorTNumNeg = 1, teamSizeNeg = 1;
+ int ancestorTNum0 = -1, teamSize0 = -1;
+ // The expected value is -1, initialize to different value.
+ int ancestorTNum1 = 1, teamSize1 = 1;
+ int check1[MaxThreads];
+ int check2[MaxThreads];
+ int check3[MaxThreads];
+ int check4[MaxThreads];
+ for (int i = 0; i < MaxThreads; i++) {
+ check1[i] = check2[i] = check3[i] = check4[i] = 0;
+ }
+
+ #pragma omp target map(level, activeLevel, ancestorTNumNeg, teamSizeNeg) \
+ map(ancestorTNum0, teamSize0, ancestorTNum1, teamSize1) \
+ map(check1[:], check2[:], check3[:], check4[:])
+ {
+ level = omp_get_level();
+ activeLevel = omp_get_active_level();
+
+ // Expected to return -1.
+ ancestorTNumNeg = omp_get_ancestor_thread_num(-1);
+ teamSizeNeg = omp_get_team_size(-1);
+
+ // Expected to return 0 and 1.
+ ancestorTNum0 = omp_get_ancestor_thread_num(0);
+ teamSize0 = omp_get_team_size(0);
+
+ // Expected to return -1 because the requested level is larger than
+ // the nest level.
+ ancestorTNum1 = omp_get_ancestor_thread_num(1);
+ teamSize1 = omp_get_team_size(1);
+
+ // Expecting active parallel region.
+ #pragma omp parallel num_threads(NumThreads)
+ {
+ int id = omp_get_thread_num();
+ // Multiply return value of omp_get_level by 5 to avoid that this test
+ // passes if both API calls return wrong values.
+ check1[id] += omp_get_level() * 5 + omp_get_active_level();
+
+ // Expected to return 0 and 1.
+ check2[id] += omp_get_ancestor_thread_num(0) + 5 * omp_get_team_size(0);
+ // Expected to return the current thread num.
+ check2[id] += (omp_get_ancestor_thread_num(1) - id);
+ // Exepcted to return the current number of threads.
+ check2[id] += 3 * omp_get_team_size(1);
+ // Expected to return -1, see above.
+ check2[id] += omp_get_ancestor_thread_num(2) + omp_get_team_size(2);
+
+ // Expecting serialized parallel region.
+ #pragma omp parallel
+ {
+ #pragma omp atomic
+ check3[id] += omp_get_level() * 5 + omp_get_active_level();
+
+ // Expected to return 0 and 1.
+ int check4Inc = omp_get_ancestor_thread_num(0) + 5 * omp_get_team_size(0);
+ // Expected to return the parent thread num.
+ check4Inc += (omp_get_ancestor_thread_num(1) - id);
+ // Exepcted to return the number of threads in the active parallel region.
+ check4Inc += 3 * omp_get_team_size(1);
+ // Exptected to return 0 and 1.
+ check4Inc += omp_get_ancestor_thread_num(2) + 3 * omp_get_team_size(2);
+ // Expected to return -1, see above.
+ check4Inc += omp_get_ancestor_thread_num(3) + omp_get_team_size(3);
+
+ #pragma omp atomic
+ check4[id] += check4Inc;
+ }
+ }
+ }
+
+ // CHECK: target: level = 0, activeLevel = 0
+ printf("target: level = %d, activeLevel = %d\n", level, activeLevel);
+ // CHECK: level = -1: ancestorTNum = -1, teamSize = -1
+ printf("level = -1: ancestorTNum = %d, teamSize = %d\n", ancestorTNumNeg, teamSizeNeg);
+ // CHECK: level = 0: ancestorTNum = 0, teamSize = 1
+ printf("level = 0: ancestorTNum = %d, teamSize = %d\n", ancestorTNum0, teamSize0);
+ // CHECK: level = 1: ancestorTNum = -1, teamSize = -1
+ printf("level = 1: ancestorTNum = %d, teamSize = %d\n", ancestorTNum1, teamSize1);
+
+ // CHECK-NOT: invalid
+ for (int i = 0; i < MaxThreads; i++) {
+ // Check active parallel region:
+ // omp_get_level() = 1, omp_get_active_level() = 1
+ const int Expected1 = 6;
+ if (i < NumThreads) {
+ if (check1[i] != Expected1) {
+ printf("invalid: check1[%d] should be %d, is %d\n", i, Expected1, check1[i]);
+ }
+ } else if (check1[i] != 0) {
+ printf("invalid: check1[%d] should be 0, is %d\n", i, check1[i]);
+ }
+
+ // 5 * 1 + 3 * 64 - 1 - 1 (see above)
+ const int Expected2 = 195;
+ if (i < NumThreads) {
+ if (check2[i] != Expected2) {
+ printf("invalid: check2[%d] should be %d, is %d\n", i, Expected2, check2[i]);
+ }
+ } else if (check2[i] != 0) {
+ printf("invalid: check2[%d] should be 0, is %d\n", i, check2[i]);
+ }
+
+ // Check serialized parallel region:
+ // omp_get_level() = 2, omp_get_active_level() = 1
+ const int Expected3 = 11;
+ if (i < NumThreads) {
+ if (check3[i] != Expected3) {
+ printf("invalid: check3[%d] should be %d, is %d\n", i, Expected3, check3[i]);
+ }
+ } else if (check3[i] != 0) {
+ printf("invalid: check3[%d] should be 0, is %d\n", i, check3[i]);
+ }
+
+ // 5 * 1 + 3 * 64 + 3 * 1 - 1 - 1 (see above)
+ const int Expected4 = 198;
+ if (i < NumThreads) {
+ if (check4[i] != Expected4) {
+ printf("invalid: check4[%d] should be %d, is %d\n", i, Expected4, check4[i]);
+ }
+ } else if (check4[i] != 0) {
+ printf("invalid: check4[%d] should be 0, is %d\n", i, check4[i]);
+ }
+ }
+
+ return 0;
+}
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/parallel/nested.c b/final/libomptarget/deviceRTLs/nvptx/test/parallel/nested.c
new file mode 100644
index 0000000..70ebb1d
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/test/parallel/nested.c
@@ -0,0 +1,136 @@
+// RUN: %compile-run-and-check
+
+#include <omp.h>
+#include <stdio.h>
+
+const int MaxThreads = 1024;
+const int NumThreads = 64;
+const int NumThreads1 = 1;
+
+int main(int argc, char *argv[]) {
+ int inParallel = -1, numThreads = -1, threadNum = -1;
+ int check1[MaxThreads];
+ int check2[MaxThreads];
+ for (int i = 0; i < MaxThreads; i++) {
+ check1[i] = check2[i] = 0;
+ }
+
+#pragma omp target map(inParallel, numThreads, threadNum, check1[:], check2[:])
+ {
+ inParallel = omp_in_parallel();
+ numThreads = omp_get_num_threads();
+ threadNum = omp_get_thread_num();
+
+// Expecting active parallel region.
+#pragma omp parallel num_threads(NumThreads)
+ {
+ int id = omp_get_thread_num();
+ check1[id] += omp_get_num_threads() + omp_in_parallel();
+
+// Expecting serialized parallel region.
+#pragma omp parallel
+ {
+ // Expected to be 1.
+ int nestedInParallel = omp_in_parallel();
+ // Expected to be 1.
+ int nestedNumThreads = omp_get_num_threads();
+ // Expected to be 0.
+ int nestedThreadNum = omp_get_thread_num();
+#pragma omp atomic
+ check2[id] += nestedInParallel + nestedNumThreads + nestedThreadNum;
+ }
+ }
+ }
+
+ // CHECK: target: inParallel = 0, numThreads = 1, threadNum = 0
+ printf("target: inParallel = %d, numThreads = %d, threadNum = %d\n",
+ inParallel, numThreads, threadNum);
+
+ // CHECK-NOT: invalid
+ for (int i = 0; i < MaxThreads; i++) {
+ // Check that all threads reported
+ // omp_get_num_threads() = 64, omp_in_parallel() = 1.
+ int Expected = NumThreads + 1;
+ if (i < NumThreads) {
+ if (check1[i] != Expected) {
+ printf("invalid: check1[%d] should be %d, is %d\n", i, Expected,
+ check1[i]);
+ }
+ } else if (check1[i] != 0) {
+ printf("invalid: check1[%d] should be 0, is %d\n", i, check1[i]);
+ }
+
+ // Check serialized parallel region.
+ if (i < NumThreads) {
+ if (check2[i] != 2) {
+ printf("invalid: check2[%d] should be 2, is %d\n", i, check2[i]);
+ }
+ } else if (check2[i] != 0) {
+ printf("invalid: check2[%d] should be 0, is %d\n", i, check2[i]);
+ }
+ }
+
+ inParallel = -1;
+ numThreads = -1;
+ threadNum = -1;
+ for (int i = 0; i < MaxThreads; i++) {
+ check1[i] = check2[i] = 0;
+ }
+
+#pragma omp target map(inParallel, numThreads, threadNum, check1[:], check2[:])
+ {
+ inParallel = omp_in_parallel();
+ numThreads = omp_get_num_threads();
+ threadNum = omp_get_thread_num();
+
+// Expecting active parallel region.
+#pragma omp parallel num_threads(NumThreads1)
+ {
+ int id = omp_get_thread_num();
+ check1[id] += omp_get_num_threads() + omp_in_parallel();
+
+// Expecting serialized parallel region.
+#pragma omp parallel
+ {
+ // Expected to be 0.
+ int nestedInParallel = omp_in_parallel();
+ // Expected to be 1.
+ int nestedNumThreads = omp_get_num_threads();
+ // Expected to be 0.
+ int nestedThreadNum = omp_get_thread_num();
+#pragma omp atomic
+ check2[id] += nestedInParallel + nestedNumThreads + nestedThreadNum;
+ }
+ }
+ }
+
+ // CHECK: target: inParallel = 0, numThreads = 1, threadNum = 0
+ printf("target: inParallel = %d, numThreads = %d, threadNum = %d\n",
+ inParallel, numThreads, threadNum);
+
+ // CHECK-NOT: invalid
+ for (int i = 0; i < MaxThreads; i++) {
+ // Check that all threads reported
+ // omp_get_num_threads() = 1, omp_in_parallel() = 0.
+ int Expected = 1;
+ if (i < NumThreads1) {
+ if (check1[i] != Expected) {
+ printf("invalid: check1[%d] should be %d, is %d\n", i, Expected,
+ check1[i]);
+ }
+ } else if (check1[i] != 0) {
+ printf("invalid: check1[%d] should be 0, is %d\n", i, check1[i]);
+ }
+
+ // Check serialized parallel region.
+ if (i < NumThreads1) {
+ if (check2[i] != 1) {
+ printf("invalid: check2[%d] should be 1, is %d\n", i, check2[i]);
+ }
+ } else if (check2[i] != 0) {
+ printf("invalid: check2[%d] should be 0, is %d\n", i, check2[i]);
+ }
+ }
+
+ return 0;
+}
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/parallel/num_threads.c b/final/libomptarget/deviceRTLs/nvptx/test/parallel/num_threads.c
new file mode 100644
index 0000000..4a2f73f
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/test/parallel/num_threads.c
@@ -0,0 +1,102 @@
+// RUN: %compile-run-and-check
+
+#include <stdio.h>
+#include <omp.h>
+
+const int WarpSize = 32;
+const int NumThreads1 = 1 * WarpSize;
+const int NumThreads2 = 2 * WarpSize;
+const int NumThreads3 = 3 * WarpSize;
+const int MaxThreads = 1024;
+
+int main(int argc, char *argv[]) {
+ int check1[MaxThreads];
+ int check2[MaxThreads];
+ int check3[MaxThreads];
+ int check4[MaxThreads];
+ for (int i = 0; i < MaxThreads; i++) {
+ check1[i] = check2[i] = check3[i] = check4[i] = 0;
+ }
+
+ int maxThreads1 = -1;
+ int maxThreads2 = -1;
+ int maxThreads3 = -1;
+
+ #pragma omp target map(check1[:], check2[:], check3[:], check4[:]) \
+ map(maxThreads1, maxThreads2, maxThreads3)
+ {
+ #pragma omp parallel num_threads(NumThreads1)
+ {
+ check1[omp_get_thread_num()] += omp_get_num_threads();
+ }
+
+ // API method to set number of threads in parallel regions without
+ // num_threads() clause.
+ omp_set_num_threads(NumThreads2);
+ maxThreads1 = omp_get_max_threads();
+ #pragma omp parallel
+ {
+ check2[omp_get_thread_num()] += omp_get_num_threads();
+ }
+
+ maxThreads2 = omp_get_max_threads();
+
+ // num_threads() clause should override nthreads-var ICV.
+ #pragma omp parallel num_threads(NumThreads3)
+ {
+ check3[omp_get_thread_num()] += omp_get_num_threads();
+ }
+
+ maxThreads3 = omp_get_max_threads();
+
+ // Effect from omp_set_num_threads() should still be visible.
+ #pragma omp parallel
+ {
+ check4[omp_get_thread_num()] += omp_get_num_threads();
+ }
+ }
+
+ // CHECK: maxThreads1 = 64
+ printf("maxThreads1 = %d\n", maxThreads1);
+ // CHECK: maxThreads2 = 64
+ printf("maxThreads2 = %d\n", maxThreads2);
+ // CHECK: maxThreads3 = 64
+ printf("maxThreads3 = %d\n", maxThreads3);
+
+ // CHECK-NOT: invalid
+ for (int i = 0; i < MaxThreads; i++) {
+ if (i < NumThreads1) {
+ if (check1[i] != NumThreads1) {
+ printf("invalid: check1[%d] should be %d, is %d\n", i, NumThreads1, check1[i]);
+ }
+ } else if (check1[i] != 0) {
+ printf("invalid: check1[%d] should be 0, is %d\n", i, check1[i]);
+ }
+
+ if (i < NumThreads2) {
+ if (check2[i] != NumThreads2) {
+ printf("invalid: check2[%d] should be %d, is %d\n", i, NumThreads2, check2[i]);
+ }
+ } else if (check2[i] != 0) {
+ printf("invalid: check2[%d] should be 0, is %d\n", i, check2[i]);
+ }
+
+ if (i < NumThreads3) {
+ if (check3[i] != NumThreads3) {
+ printf("invalid: check3[%d] should be %d, is %d\n", i, NumThreads3, check3[i]);
+ }
+ } else if (check3[i] != 0) {
+ printf("invalid: check3[%d] should be 0, is %d\n", i, check3[i]);
+ }
+
+ if (i < NumThreads2) {
+ if (check4[i] != NumThreads2) {
+ printf("invalid: check4[%d] should be %d, is %d\n", i, NumThreads2, check4[i]);
+ }
+ } else if (check4[i] != 0) {
+ printf("invalid: check4[%d] should be 0, is %d\n", i, check4[i]);
+ }
+ }
+
+ return 0;
+}
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp b/final/libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp
new file mode 100644
index 0000000..517db59
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp
@@ -0,0 +1,51 @@
+// RUN: %compilexx-run-and-check
+
+#include <stdio.h>
+#include <omp.h>
+
+int main(void) {
+ int isHost = -1;
+ int ParallelLevel1 = -1, ParallelLevel2 = -1;
+ int Count = 0;
+
+#pragma omp target parallel for map(tofrom \
+ : isHost, ParallelLevel1, ParallelLevel2), reduction(+: Count) schedule(static, 1)
+ for (int J = 0; J < 10; ++J) {
+#pragma omp critical
+ {
+ isHost = (isHost < 0 || isHost == 0) ? omp_is_initial_device() : isHost;
+ ParallelLevel1 = (ParallelLevel1 < 0 || ParallelLevel1 == 1)
+ ? omp_get_level()
+ : ParallelLevel1;
+ }
+ if (omp_get_thread_num() > 5) {
+ int L2;
+#pragma omp parallel for schedule(dynamic) lastprivate(L2) reduction(+: Count)
+ for (int I = 0; I < 10; ++I) {
+ L2 = omp_get_level();
+ Count += omp_get_level(); // (10-6)*10*2 = 80
+ }
+#pragma omp critical
+ ParallelLevel2 =
+ (ParallelLevel2 < 0 || ParallelLevel2 == 2) ? L2 : ParallelLevel2;
+ } else {
+ Count += omp_get_level(); // 6 * 1 = 6
+ }
+ }
+
+ if (isHost < 0) {
+ printf("Runtime error, isHost=%d\n", isHost);
+ }
+
+ // CHECK: Target region executed on the device
+ printf("Target region executed on the %s\n", isHost ? "host" : "device");
+ // CHECK: Parallel level in SPMD mode: L1 is 1, L2 is 2
+ printf("Parallel level in SPMD mode: L1 is %d, L2 is %d\n", ParallelLevel1,
+ ParallelLevel2);
+ // Final result of Count is (10-6)(num of loops)*10(num of iterations)*2(par
+ // level) + 6(num of iterations) * 1(par level)
+ // CHECK: Expected count = 86
+ printf("Expected count = %d\n", Count);
+
+ return isHost;
+}
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/parallel/thread_limit.c b/final/libomptarget/deviceRTLs/nvptx/test/parallel/thread_limit.c
new file mode 100644
index 0000000..5e40bb5
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/test/parallel/thread_limit.c
@@ -0,0 +1,77 @@
+// RUN: %compile-run-and-check
+
+#include <stdio.h>
+#include <omp.h>
+
+const int WarpSize = 32;
+const int ThreadLimit = 1 * WarpSize;
+const int NumThreads2 = 2 * WarpSize;
+const int NumThreads3 = 3 * WarpSize;
+const int MaxThreads = 1024;
+
+int main(int argc, char *argv[]) {
+ int check1[MaxThreads];
+ int check2[MaxThreads];
+ int check3[MaxThreads];
+ for (int i = 0; i < MaxThreads; i++) {
+ check1[i] = check2[i] = check3[i] = 0;
+ }
+
+ int threadLimit = -1;
+
+ #pragma omp target teams num_teams(1) thread_limit(ThreadLimit) \
+ map(check1[:], check2[:], check3[:], threadLimit)
+ {
+ threadLimit = omp_get_thread_limit();
+
+ // All parallel regions should get as many threads as specified by the
+ // thread_limit() clause.
+ #pragma omp parallel
+ {
+ check1[omp_get_thread_num()] += omp_get_num_threads();
+ }
+
+ omp_set_num_threads(NumThreads2);
+ #pragma omp parallel
+ {
+ check2[omp_get_thread_num()] += omp_get_num_threads();
+ }
+
+ #pragma omp parallel num_threads(NumThreads3)
+ {
+ check3[omp_get_thread_num()] += omp_get_num_threads();
+ }
+ }
+
+ // CHECK: threadLimit = 32
+ printf("threadLimit = %d\n", threadLimit);
+
+ // CHECK-NOT: invalid
+ for (int i = 0; i < MaxThreads; i++) {
+ if (i < ThreadLimit) {
+ if (check1[i] != ThreadLimit) {
+ printf("invalid: check1[%d] should be %d, is %d\n", i, ThreadLimit, check1[i]);
+ }
+ } else if (check1[i] != 0) {
+ printf("invalid: check1[%d] should be 0, is %d\n", i, check1[i]);
+ }
+
+ if (i < ThreadLimit) {
+ if (check2[i] != ThreadLimit) {
+ printf("invalid: check2[%d] should be %d, is %d\n", i, ThreadLimit, check2[i]);
+ }
+ } else if (check2[i] != 0) {
+ printf("invalid: check2[%d] should be 0, is %d\n", i, check2[i]);
+ }
+
+ if (i < ThreadLimit) {
+ if (check3[i] != ThreadLimit) {
+ printf("invalid: check3[%d] should be %d, is %d\n", i, ThreadLimit, check3[i]);
+ }
+ } else if (check3[i] != 0) {
+ printf("invalid: check3[%d] should be 0, is %d\n", i, check3[i]);
+ }
+ }
+
+ return 0;
+}
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c b/final/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c
new file mode 100644
index 0000000..b3f8768
--- /dev/null
+++ b/final/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c
@@ -0,0 +1,22 @@
+// RUN: %compile-run-and-check
+
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+ int res = 0;
+
+#pragma omp parallel num_threads(2) reduction(+:res)
+ {
+ int tid = omp_get_thread_num();
+#pragma omp target teams distribute reduction(+:res)
+ for (int i = tid; i < 2; i++)
+ ++res;
+ }
+ // The first thread makes 2 iterations, the second - 1. Expected result of the
+ // reduction res is 3.
+
+ // CHECK: res = 3.
+ printf("res = %d.\n", res);
+ return 0;
+}