diff options
Diffstat (limited to 'final/libomptarget/deviceRTLs/nvptx/test')
14 files changed, 882 insertions, 0 deletions
diff --git a/final/libomptarget/deviceRTLs/nvptx/test/CMakeLists.txt b/final/libomptarget/deviceRTLs/nvptx/test/CMakeLists.txt new file mode 100644 index 0000000..33945d1 --- /dev/null +++ b/final/libomptarget/deviceRTLs/nvptx/test/CMakeLists.txt @@ -0,0 +1,26 @@ +if(NOT OPENMP_TEST_COMPILER_ID STREQUAL "Clang") + # Silently return, no need to annoy the user. + return() +endif() + +set(deps omptarget-nvptx omptarget omp) +if(LIBOMPTARGET_NVPTX_ENABLE_BCLIB) + set(deps ${deps} omptarget-nvptx-bc) +endif() + +# Don't run by default. +set(EXCLUDE_FROM_ALL True) +# Run with only one thread to only launch one application to the GPU at a time. +add_openmp_testsuite(check-libomptarget-nvptx + "Running libomptarget-nvptx tests" ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${deps} ARGS -j1) + +set(LIBOMPTARGET_NVPTX_TEST_FLAGS "" CACHE STRING + "Extra compiler flags to send to the test compiler.") +set(LIBOMPTARGET_NVPTX_TEST_OPENMP_FLAGS + "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda" CACHE STRING + "OpenMP compiler flags to use for testing libomptarget-nvptx.") + +# Configure the lit.site.cfg.in file +set(AUTO_GEN_COMMENT "## Autogenerated by libomptarget-nvptx configuration.\n# Do not edit!") +configure_file(lit.site.cfg.in lit.site.cfg @ONLY) diff --git a/final/libomptarget/deviceRTLs/nvptx/test/api/ignored.c b/final/libomptarget/deviceRTLs/nvptx/test/api/ignored.c new file mode 100644 index 0000000..1fa9ae0 --- /dev/null +++ b/final/libomptarget/deviceRTLs/nvptx/test/api/ignored.c @@ -0,0 +1,38 @@ +// RUN: %compile-run-and-check + +#include <omp.h> +#include <stdio.h> + +const int MaxThreads = 1024; + +int main(int argc, char *argv[]) { + int cancellation = -1, dynamic = -1, nested = -1, maxActiveLevels = -1; + + #pragma omp target map(cancellation, dynamic, nested, maxActiveLevels) + { + // libomptarget-nvptx doesn't support cancellation. + cancellation = omp_get_cancellation(); + + // No support for dynamic adjustment of the number of threads. + omp_set_dynamic(1); + dynamic = omp_get_dynamic(); + + // libomptarget-nvptx doesn't support nested parallelism. + omp_set_nested(1); + nested = omp_get_nested(); + + omp_set_max_active_levels(42); + maxActiveLevels = omp_get_max_active_levels(); + } + + // CHECK: cancellation = 0 + printf("cancellation = %d\n", cancellation); + // CHECK: dynamic = 0 + printf("dynamic = %d\n", dynamic); + // CHECK: nested = 0 + printf("nested = %d\n", nested); + // CHECK: maxActiveLevels = 1 + printf("maxActiveLevels = %d\n", maxActiveLevels); + + return 0; +} diff --git a/final/libomptarget/deviceRTLs/nvptx/test/api/max_threads.c b/final/libomptarget/deviceRTLs/nvptx/test/api/max_threads.c new file mode 100644 index 0000000..d0d9f31 --- /dev/null +++ b/final/libomptarget/deviceRTLs/nvptx/test/api/max_threads.c @@ -0,0 +1,46 @@ +// RUN: %compile-run-and-check + +#include <omp.h> +#include <stdio.h> + +int main(int argc, char *argv[]) { + int MaxThreadsL1 = -1, MaxThreadsL2 = -1; + +#pragma omp declare reduction(unique:int \ + : omp_out = (omp_in == 1 ? omp_in : omp_out)) \ + initializer(omp_priv = -1) + + // Non-SPMD mode. +#pragma omp target teams map(MaxThreadsL1, MaxThreadsL2) thread_limit(32) \ + num_teams(1) + { + MaxThreadsL1 = omp_get_max_threads(); +#pragma omp parallel reduction(unique : MaxThreadsL2) + { MaxThreadsL2 = omp_get_max_threads(); } + } + + // CHECK: Non-SPMD MaxThreadsL1 = 32 + printf("Non-SPMD MaxThreadsL1 = %d\n", MaxThreadsL1); + // CHECK: Non-SPMD MaxThreadsL2 = 1 + printf("Non-SPMD MaxThreadsL2 = %d\n", MaxThreadsL2); + + // SPMD mode with full runtime + MaxThreadsL2 = -1; +#pragma omp target parallel reduction(unique : MaxThreadsL2) + { MaxThreadsL2 = omp_get_max_threads(); } + + // CHECK: SPMD with full runtime MaxThreadsL2 = 1 + printf("SPMD with full runtime MaxThreadsL2 = %d\n", MaxThreadsL2); + + // SPMD mode without runtime + MaxThreadsL2 = -1; +#pragma omp target parallel for reduction(unique : MaxThreadsL2) + for (int I = 0; I < 2; ++I) { + MaxThreadsL2 = omp_get_max_threads(); + } + + // CHECK: SPMD without runtime MaxThreadsL2 = 1 + printf("SPMD without runtime MaxThreadsL2 = %d\n", MaxThreadsL2); + + return 0; +} diff --git a/final/libomptarget/deviceRTLs/nvptx/test/api/thread_limit.c b/final/libomptarget/deviceRTLs/nvptx/test/api/thread_limit.c new file mode 100644 index 0000000..626d620 --- /dev/null +++ b/final/libomptarget/deviceRTLs/nvptx/test/api/thread_limit.c @@ -0,0 +1,72 @@ +// RUN: %compile-run-and-check + +#include <omp.h> +#include <stdio.h> + +int main(int argc, char *argv[]) { + int ThreadLimitL0 = -1, ThreadLimitL1 = -1, ThreadLimitL2 = -1; + +#pragma omp declare reduction(unique64:int \ + : omp_out = (omp_in == 64 ? omp_in : omp_out)) \ + initializer(omp_priv = -1) +#pragma omp declare reduction(unique32:int \ + : omp_out = (omp_in == 32 ? omp_in : omp_out)) \ + initializer(omp_priv = -1) + + // Non-SPMD mode. +#pragma omp target teams map(ThreadLimitL0, ThreadLimitL1, ThreadLimitL2) \ + thread_limit(64) num_teams(1) + { + ThreadLimitL0 = omp_get_thread_limit(); +#pragma omp parallel reduction(unique64 \ + : ThreadLimitL1, ThreadLimitL2) num_threads(32) + { + ThreadLimitL1 = omp_get_thread_limit(); +#pragma omp parallel reduction(unique64 : ThreadLimitL2) + { ThreadLimitL2 = omp_get_thread_limit(); } + } + } + + // CHECK: Non-SPMD ThreadLimitL0 = 64 + printf("Non-SPMD ThreadLimitL0 = %d\n", ThreadLimitL0); + // CHECK: Non-SPMD ThreadLimitL1 = 64 + printf("Non-SPMD ThreadLimitL1 = %d\n", ThreadLimitL1); + // CHECK: Non-SPMD ThreadLimitL2 = 64 + printf("Non-SPMD ThreadLimitL2 = %d\n", ThreadLimitL2); + + // SPMD mode with full runtime + ThreadLimitL1 = -1; + ThreadLimitL2 = -1; +#pragma omp target parallel reduction(unique32 \ + : ThreadLimitL1, ThreadLimitL2) \ + num_threads(32) + { + ThreadLimitL1 = omp_get_thread_limit(); +#pragma omp parallel reduction(unique32 : ThreadLimitL2) + { ThreadLimitL2 = omp_get_thread_limit(); } + } + + // CHECK: SPMD with full runtime ThreadLimitL1 = 32 + printf("SPMD with full runtime ThreadLimitL1 = %d\n", ThreadLimitL1); + // CHECK: SPMD with full runtime ThreadLimitL2 = 32 + printf("SPMD with full runtime ThreadLimitL2 = %d\n", ThreadLimitL2); + + // SPMD mode without runtime + ThreadLimitL1 = -1; + ThreadLimitL2 = -1; +#pragma omp target parallel for reduction(unique32 \ + : ThreadLimitL1, ThreadLimitL2) \ + num_threads(32) + for (int I = 0; I < 2; ++I) { + ThreadLimitL1 = omp_get_thread_limit(); +#pragma omp parallel reduction(unique32 : ThreadLimitL2) + { ThreadLimitL2 = omp_get_thread_limit(); } + } + + // CHECK: SPMD without runtime ThreadLimitL1 = 32 + printf("SPMD without runtime ThreadLimitL1 = %d\n", ThreadLimitL1); + // CHECK: SPMD without runtime ThreadLimitL2 = 32 + printf("SPMD without runtime ThreadLimitL2 = %d\n", ThreadLimitL2); + + return 0; +} diff --git a/final/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c b/final/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c new file mode 100644 index 0000000..dd17ae7 --- /dev/null +++ b/final/libomptarget/deviceRTLs/nvptx/test/data_sharing/alignment.c @@ -0,0 +1,55 @@ +// RUN: %compile-run-and-check + +#include <omp.h> +#include <stdio.h> + +#pragma omp declare target +static void putValueInParallel(int *ptr, int value) { + #pragma omp parallel + { + *ptr = value; + } +} + +static int getId() { + int id; + putValueInParallel(&id, omp_get_thread_num()); + return id; +} +#pragma omp end declare target + +const int MaxThreads = 1024; +const int Threads = 64; + +int main(int argc, char *argv[]) { + int master; + int check[MaxThreads]; + for (int i = 0; i < MaxThreads; i++) { + check[i] = 0; + } + + #pragma omp target map(master, check[:]) + { + master = getId(); + + #pragma omp parallel num_threads(Threads) + { + check[omp_get_thread_num()] = getId(); + } + } + + // CHECK: master = 0. + printf("master = %d.\n", master); + // CHECK-NOT: invalid + for (int i = 0; i < MaxThreads; i++) { + if (i < Threads) { + if (check[i] != i) { + printf("invalid: check[%d] should be %d, is %d\n", i, i, check[i]); + } + } else if (check[i] != 0) { + printf("invalid: check[%d] should be 0, is %d\n", i, check[i]); + } + } + + return 0; +} diff --git a/final/libomptarget/deviceRTLs/nvptx/test/lit.cfg b/final/libomptarget/deviceRTLs/nvptx/test/lit.cfg new file mode 100644 index 0000000..0774c25 --- /dev/null +++ b/final/libomptarget/deviceRTLs/nvptx/test/lit.cfg @@ -0,0 +1,69 @@ +# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79: +# Configuration file for the 'lit' test runner. + +import os +import lit.formats + +# Tell pylint that we know config and lit_config exist somewhere. +if 'PYLINT_IMPORT' in os.environ: + config = object() + lit_config = object() + +def prepend_library_path(name, value, sep): + if name in config.environment: + config.environment[name] = value + sep + config.environment[name] + else: + config.environment[name] = value + +# name: The name of this test suite. +config.name = 'libomptarget-nvptx' + +# suffixes: A list of file extensions to treat as test files. +config.suffixes = ['.c', '.cpp', '.cc'] + +# test_source_root: The root path where tests are located. +config.test_source_root = os.path.dirname(__file__) + +# test_exec_root: The root object directory where output is placed +config.test_exec_root = config.binary_dir + +# test format +config.test_format = lit.formats.ShTest() + +# compiler flags +config.test_flags = " -I " + config.omp_header_directory + \ + " -L " + config.library_dir + \ + " --libomptarget-nvptx-path=" + config.library_dir; + +if config.omp_host_rtl_directory: + config.test_flags = config.test_flags + \ + " -L " + config.omp_host_rtl_directory + +config.test_flags = config.test_flags + " " + config.test_extra_flags + +# Setup environment to find dynamic library at runtime. +prepend_library_path('LD_LIBRARY_PATH', config.library_dir, ":") +prepend_library_path('LD_LIBRARY_PATH', config.omp_host_rtl_directory, ":") + +# Forbid fallback to host. +config.environment["OMP_TARGET_OFFLOAD"] = "MANDATORY" + +# substitutions +config.substitutions.append(("%compilexx-run-and-check", + "%compilexx-and-run | " + config.libomptarget_filecheck + " %s")) +config.substitutions.append(("%compile-run-and-check", + "%compile-and-run | " + config.libomptarget_filecheck + " %s")) +config.substitutions.append(("%compilexx-and-run", "%compilexx && %run")) +config.substitutions.append(("%compile-and-run", "%compile && %run")) + +config.substitutions.append(("%compilexx", + "%clangxx %openmp_flags %flags %s -o %t")) +config.substitutions.append(("%compile", + "%clang %openmp_flags %flags %s -o %t")) + +config.substitutions.append(("%clangxx", config.test_cxx_compiler)) +config.substitutions.append(("%clang", config.test_c_compiler)) +config.substitutions.append(("%openmp_flags", config.test_openmp_flags)) +config.substitutions.append(("%flags", config.test_flags)) + +config.substitutions.append(("%run", "%t")) diff --git a/final/libomptarget/deviceRTLs/nvptx/test/lit.site.cfg.in b/final/libomptarget/deviceRTLs/nvptx/test/lit.site.cfg.in new file mode 100644 index 0000000..d9c14cb --- /dev/null +++ b/final/libomptarget/deviceRTLs/nvptx/test/lit.site.cfg.in @@ -0,0 +1,14 @@ +@AUTO_GEN_COMMENT@ + +config.test_c_compiler = "@OPENMP_TEST_C_COMPILER@" +config.test_cxx_compiler = "@OPENMP_TEST_CXX_COMPILER@" +config.test_openmp_flags = "@LIBOMPTARGET_NVPTX_TEST_OPENMP_FLAGS@" +config.test_extra_flags = "@LIBOMPTARGET_NVPTX_TEST_FLAGS@" +config.binary_dir = "@CMAKE_CURRENT_BINARY_DIR@" +config.library_dir = "@LIBOMPTARGET_LIBRARY_DIR@" +config.omp_header_directory = "@LIBOMPTARGET_OPENMP_HEADER_FOLDER@" +config.omp_host_rtl_directory = "@LIBOMPTARGET_OPENMP_HOST_RTL_FOLDER@" +config.libomptarget_filecheck = "@OPENMP_FILECHECK_EXECUTABLE@" + +# Let the main config do the real work. +lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg") diff --git a/final/libomptarget/deviceRTLs/nvptx/test/parallel/flush.c b/final/libomptarget/deviceRTLs/nvptx/test/parallel/flush.c new file mode 100644 index 0000000..412538b --- /dev/null +++ b/final/libomptarget/deviceRTLs/nvptx/test/parallel/flush.c @@ -0,0 +1,35 @@ +// RUN: %compile-run-and-check + +#include <omp.h> +#include <stdio.h> + +int main(int argc, char *argv[]) { + int data, out, flag = 0; +#pragma omp target parallel num_threads(64) map(tofrom \ + : out, flag) map(to \ + : data) + { + if (omp_get_thread_num() == 0) { + /* Write to the data buffer that will be read by thread */ + data = 42; +/* Flush data to thread 32 */ +#pragma omp flush(data) + /* Set flag to release thread 32 */ +#pragma omp atomic write + flag = 1; + } else if (omp_get_thread_num() == 32) { + /* Loop until we see the update to the flag */ + int val; + do { +#pragma omp atomic read + val = flag; + } while (val < 1); + out = data; +#pragma omp flush(out) + } + } + // CHECK: out=42. + /* Value of out will be 42 */ + printf("out=%d.\n", out); + return !(out == 42); +} diff --git a/final/libomptarget/deviceRTLs/nvptx/test/parallel/level.c b/final/libomptarget/deviceRTLs/nvptx/test/parallel/level.c new file mode 100644 index 0000000..edb00e0 --- /dev/null +++ b/final/libomptarget/deviceRTLs/nvptx/test/parallel/level.c @@ -0,0 +1,139 @@ +// RUN: %compile-run-and-check + +#include <omp.h> +#include <stdio.h> + +const int MaxThreads = 1024; +const int NumThreads = 64; + +int main(int argc, char *argv[]) { + int level = -1, activeLevel = -1; + // The expected value is -1, initialize to different value. + int ancestorTNumNeg = 1, teamSizeNeg = 1; + int ancestorTNum0 = -1, teamSize0 = -1; + // The expected value is -1, initialize to different value. + int ancestorTNum1 = 1, teamSize1 = 1; + int check1[MaxThreads]; + int check2[MaxThreads]; + int check3[MaxThreads]; + int check4[MaxThreads]; + for (int i = 0; i < MaxThreads; i++) { + check1[i] = check2[i] = check3[i] = check4[i] = 0; + } + + #pragma omp target map(level, activeLevel, ancestorTNumNeg, teamSizeNeg) \ + map(ancestorTNum0, teamSize0, ancestorTNum1, teamSize1) \ + map(check1[:], check2[:], check3[:], check4[:]) + { + level = omp_get_level(); + activeLevel = omp_get_active_level(); + + // Expected to return -1. + ancestorTNumNeg = omp_get_ancestor_thread_num(-1); + teamSizeNeg = omp_get_team_size(-1); + + // Expected to return 0 and 1. + ancestorTNum0 = omp_get_ancestor_thread_num(0); + teamSize0 = omp_get_team_size(0); + + // Expected to return -1 because the requested level is larger than + // the nest level. + ancestorTNum1 = omp_get_ancestor_thread_num(1); + teamSize1 = omp_get_team_size(1); + + // Expecting active parallel region. + #pragma omp parallel num_threads(NumThreads) + { + int id = omp_get_thread_num(); + // Multiply return value of omp_get_level by 5 to avoid that this test + // passes if both API calls return wrong values. + check1[id] += omp_get_level() * 5 + omp_get_active_level(); + + // Expected to return 0 and 1. + check2[id] += omp_get_ancestor_thread_num(0) + 5 * omp_get_team_size(0); + // Expected to return the current thread num. + check2[id] += (omp_get_ancestor_thread_num(1) - id); + // Exepcted to return the current number of threads. + check2[id] += 3 * omp_get_team_size(1); + // Expected to return -1, see above. + check2[id] += omp_get_ancestor_thread_num(2) + omp_get_team_size(2); + + // Expecting serialized parallel region. + #pragma omp parallel + { + #pragma omp atomic + check3[id] += omp_get_level() * 5 + omp_get_active_level(); + + // Expected to return 0 and 1. + int check4Inc = omp_get_ancestor_thread_num(0) + 5 * omp_get_team_size(0); + // Expected to return the parent thread num. + check4Inc += (omp_get_ancestor_thread_num(1) - id); + // Exepcted to return the number of threads in the active parallel region. + check4Inc += 3 * omp_get_team_size(1); + // Exptected to return 0 and 1. + check4Inc += omp_get_ancestor_thread_num(2) + 3 * omp_get_team_size(2); + // Expected to return -1, see above. + check4Inc += omp_get_ancestor_thread_num(3) + omp_get_team_size(3); + + #pragma omp atomic + check4[id] += check4Inc; + } + } + } + + // CHECK: target: level = 0, activeLevel = 0 + printf("target: level = %d, activeLevel = %d\n", level, activeLevel); + // CHECK: level = -1: ancestorTNum = -1, teamSize = -1 + printf("level = -1: ancestorTNum = %d, teamSize = %d\n", ancestorTNumNeg, teamSizeNeg); + // CHECK: level = 0: ancestorTNum = 0, teamSize = 1 + printf("level = 0: ancestorTNum = %d, teamSize = %d\n", ancestorTNum0, teamSize0); + // CHECK: level = 1: ancestorTNum = -1, teamSize = -1 + printf("level = 1: ancestorTNum = %d, teamSize = %d\n", ancestorTNum1, teamSize1); + + // CHECK-NOT: invalid + for (int i = 0; i < MaxThreads; i++) { + // Check active parallel region: + // omp_get_level() = 1, omp_get_active_level() = 1 + const int Expected1 = 6; + if (i < NumThreads) { + if (check1[i] != Expected1) { + printf("invalid: check1[%d] should be %d, is %d\n", i, Expected1, check1[i]); + } + } else if (check1[i] != 0) { + printf("invalid: check1[%d] should be 0, is %d\n", i, check1[i]); + } + + // 5 * 1 + 3 * 64 - 1 - 1 (see above) + const int Expected2 = 195; + if (i < NumThreads) { + if (check2[i] != Expected2) { + printf("invalid: check2[%d] should be %d, is %d\n", i, Expected2, check2[i]); + } + } else if (check2[i] != 0) { + printf("invalid: check2[%d] should be 0, is %d\n", i, check2[i]); + } + + // Check serialized parallel region: + // omp_get_level() = 2, omp_get_active_level() = 1 + const int Expected3 = 11; + if (i < NumThreads) { + if (check3[i] != Expected3) { + printf("invalid: check3[%d] should be %d, is %d\n", i, Expected3, check3[i]); + } + } else if (check3[i] != 0) { + printf("invalid: check3[%d] should be 0, is %d\n", i, check3[i]); + } + + // 5 * 1 + 3 * 64 + 3 * 1 - 1 - 1 (see above) + const int Expected4 = 198; + if (i < NumThreads) { + if (check4[i] != Expected4) { + printf("invalid: check4[%d] should be %d, is %d\n", i, Expected4, check4[i]); + } + } else if (check4[i] != 0) { + printf("invalid: check4[%d] should be 0, is %d\n", i, check4[i]); + } + } + + return 0; +} diff --git a/final/libomptarget/deviceRTLs/nvptx/test/parallel/nested.c b/final/libomptarget/deviceRTLs/nvptx/test/parallel/nested.c new file mode 100644 index 0000000..70ebb1d --- /dev/null +++ b/final/libomptarget/deviceRTLs/nvptx/test/parallel/nested.c @@ -0,0 +1,136 @@ +// RUN: %compile-run-and-check + +#include <omp.h> +#include <stdio.h> + +const int MaxThreads = 1024; +const int NumThreads = 64; +const int NumThreads1 = 1; + +int main(int argc, char *argv[]) { + int inParallel = -1, numThreads = -1, threadNum = -1; + int check1[MaxThreads]; + int check2[MaxThreads]; + for (int i = 0; i < MaxThreads; i++) { + check1[i] = check2[i] = 0; + } + +#pragma omp target map(inParallel, numThreads, threadNum, check1[:], check2[:]) + { + inParallel = omp_in_parallel(); + numThreads = omp_get_num_threads(); + threadNum = omp_get_thread_num(); + +// Expecting active parallel region. +#pragma omp parallel num_threads(NumThreads) + { + int id = omp_get_thread_num(); + check1[id] += omp_get_num_threads() + omp_in_parallel(); + +// Expecting serialized parallel region. +#pragma omp parallel + { + // Expected to be 1. + int nestedInParallel = omp_in_parallel(); + // Expected to be 1. + int nestedNumThreads = omp_get_num_threads(); + // Expected to be 0. + int nestedThreadNum = omp_get_thread_num(); +#pragma omp atomic + check2[id] += nestedInParallel + nestedNumThreads + nestedThreadNum; + } + } + } + + // CHECK: target: inParallel = 0, numThreads = 1, threadNum = 0 + printf("target: inParallel = %d, numThreads = %d, threadNum = %d\n", + inParallel, numThreads, threadNum); + + // CHECK-NOT: invalid + for (int i = 0; i < MaxThreads; i++) { + // Check that all threads reported + // omp_get_num_threads() = 64, omp_in_parallel() = 1. + int Expected = NumThreads + 1; + if (i < NumThreads) { + if (check1[i] != Expected) { + printf("invalid: check1[%d] should be %d, is %d\n", i, Expected, + check1[i]); + } + } else if (check1[i] != 0) { + printf("invalid: check1[%d] should be 0, is %d\n", i, check1[i]); + } + + // Check serialized parallel region. + if (i < NumThreads) { + if (check2[i] != 2) { + printf("invalid: check2[%d] should be 2, is %d\n", i, check2[i]); + } + } else if (check2[i] != 0) { + printf("invalid: check2[%d] should be 0, is %d\n", i, check2[i]); + } + } + + inParallel = -1; + numThreads = -1; + threadNum = -1; + for (int i = 0; i < MaxThreads; i++) { + check1[i] = check2[i] = 0; + } + +#pragma omp target map(inParallel, numThreads, threadNum, check1[:], check2[:]) + { + inParallel = omp_in_parallel(); + numThreads = omp_get_num_threads(); + threadNum = omp_get_thread_num(); + +// Expecting active parallel region. +#pragma omp parallel num_threads(NumThreads1) + { + int id = omp_get_thread_num(); + check1[id] += omp_get_num_threads() + omp_in_parallel(); + +// Expecting serialized parallel region. +#pragma omp parallel + { + // Expected to be 0. + int nestedInParallel = omp_in_parallel(); + // Expected to be 1. + int nestedNumThreads = omp_get_num_threads(); + // Expected to be 0. + int nestedThreadNum = omp_get_thread_num(); +#pragma omp atomic + check2[id] += nestedInParallel + nestedNumThreads + nestedThreadNum; + } + } + } + + // CHECK: target: inParallel = 0, numThreads = 1, threadNum = 0 + printf("target: inParallel = %d, numThreads = %d, threadNum = %d\n", + inParallel, numThreads, threadNum); + + // CHECK-NOT: invalid + for (int i = 0; i < MaxThreads; i++) { + // Check that all threads reported + // omp_get_num_threads() = 1, omp_in_parallel() = 0. + int Expected = 1; + if (i < NumThreads1) { + if (check1[i] != Expected) { + printf("invalid: check1[%d] should be %d, is %d\n", i, Expected, + check1[i]); + } + } else if (check1[i] != 0) { + printf("invalid: check1[%d] should be 0, is %d\n", i, check1[i]); + } + + // Check serialized parallel region. + if (i < NumThreads1) { + if (check2[i] != 1) { + printf("invalid: check2[%d] should be 1, is %d\n", i, check2[i]); + } + } else if (check2[i] != 0) { + printf("invalid: check2[%d] should be 0, is %d\n", i, check2[i]); + } + } + + return 0; +} diff --git a/final/libomptarget/deviceRTLs/nvptx/test/parallel/num_threads.c b/final/libomptarget/deviceRTLs/nvptx/test/parallel/num_threads.c new file mode 100644 index 0000000..4a2f73f --- /dev/null +++ b/final/libomptarget/deviceRTLs/nvptx/test/parallel/num_threads.c @@ -0,0 +1,102 @@ +// RUN: %compile-run-and-check + +#include <stdio.h> +#include <omp.h> + +const int WarpSize = 32; +const int NumThreads1 = 1 * WarpSize; +const int NumThreads2 = 2 * WarpSize; +const int NumThreads3 = 3 * WarpSize; +const int MaxThreads = 1024; + +int main(int argc, char *argv[]) { + int check1[MaxThreads]; + int check2[MaxThreads]; + int check3[MaxThreads]; + int check4[MaxThreads]; + for (int i = 0; i < MaxThreads; i++) { + check1[i] = check2[i] = check3[i] = check4[i] = 0; + } + + int maxThreads1 = -1; + int maxThreads2 = -1; + int maxThreads3 = -1; + + #pragma omp target map(check1[:], check2[:], check3[:], check4[:]) \ + map(maxThreads1, maxThreads2, maxThreads3) + { + #pragma omp parallel num_threads(NumThreads1) + { + check1[omp_get_thread_num()] += omp_get_num_threads(); + } + + // API method to set number of threads in parallel regions without + // num_threads() clause. + omp_set_num_threads(NumThreads2); + maxThreads1 = omp_get_max_threads(); + #pragma omp parallel + { + check2[omp_get_thread_num()] += omp_get_num_threads(); + } + + maxThreads2 = omp_get_max_threads(); + + // num_threads() clause should override nthreads-var ICV. + #pragma omp parallel num_threads(NumThreads3) + { + check3[omp_get_thread_num()] += omp_get_num_threads(); + } + + maxThreads3 = omp_get_max_threads(); + + // Effect from omp_set_num_threads() should still be visible. + #pragma omp parallel + { + check4[omp_get_thread_num()] += omp_get_num_threads(); + } + } + + // CHECK: maxThreads1 = 64 + printf("maxThreads1 = %d\n", maxThreads1); + // CHECK: maxThreads2 = 64 + printf("maxThreads2 = %d\n", maxThreads2); + // CHECK: maxThreads3 = 64 + printf("maxThreads3 = %d\n", maxThreads3); + + // CHECK-NOT: invalid + for (int i = 0; i < MaxThreads; i++) { + if (i < NumThreads1) { + if (check1[i] != NumThreads1) { + printf("invalid: check1[%d] should be %d, is %d\n", i, NumThreads1, check1[i]); + } + } else if (check1[i] != 0) { + printf("invalid: check1[%d] should be 0, is %d\n", i, check1[i]); + } + + if (i < NumThreads2) { + if (check2[i] != NumThreads2) { + printf("invalid: check2[%d] should be %d, is %d\n", i, NumThreads2, check2[i]); + } + } else if (check2[i] != 0) { + printf("invalid: check2[%d] should be 0, is %d\n", i, check2[i]); + } + + if (i < NumThreads3) { + if (check3[i] != NumThreads3) { + printf("invalid: check3[%d] should be %d, is %d\n", i, NumThreads3, check3[i]); + } + } else if (check3[i] != 0) { + printf("invalid: check3[%d] should be 0, is %d\n", i, check3[i]); + } + + if (i < NumThreads2) { + if (check4[i] != NumThreads2) { + printf("invalid: check4[%d] should be %d, is %d\n", i, NumThreads2, check4[i]); + } + } else if (check4[i] != 0) { + printf("invalid: check4[%d] should be 0, is %d\n", i, check4[i]); + } + } + + return 0; +} diff --git a/final/libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp b/final/libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp new file mode 100644 index 0000000..517db59 --- /dev/null +++ b/final/libomptarget/deviceRTLs/nvptx/test/parallel/spmd_parallel_regions.cpp @@ -0,0 +1,51 @@ +// RUN: %compilexx-run-and-check + +#include <stdio.h> +#include <omp.h> + +int main(void) { + int isHost = -1; + int ParallelLevel1 = -1, ParallelLevel2 = -1; + int Count = 0; + +#pragma omp target parallel for map(tofrom \ + : isHost, ParallelLevel1, ParallelLevel2), reduction(+: Count) schedule(static, 1) + for (int J = 0; J < 10; ++J) { +#pragma omp critical + { + isHost = (isHost < 0 || isHost == 0) ? omp_is_initial_device() : isHost; + ParallelLevel1 = (ParallelLevel1 < 0 || ParallelLevel1 == 1) + ? omp_get_level() + : ParallelLevel1; + } + if (omp_get_thread_num() > 5) { + int L2; +#pragma omp parallel for schedule(dynamic) lastprivate(L2) reduction(+: Count) + for (int I = 0; I < 10; ++I) { + L2 = omp_get_level(); + Count += omp_get_level(); // (10-6)*10*2 = 80 + } +#pragma omp critical + ParallelLevel2 = + (ParallelLevel2 < 0 || ParallelLevel2 == 2) ? L2 : ParallelLevel2; + } else { + Count += omp_get_level(); // 6 * 1 = 6 + } + } + + if (isHost < 0) { + printf("Runtime error, isHost=%d\n", isHost); + } + + // CHECK: Target region executed on the device + printf("Target region executed on the %s\n", isHost ? "host" : "device"); + // CHECK: Parallel level in SPMD mode: L1 is 1, L2 is 2 + printf("Parallel level in SPMD mode: L1 is %d, L2 is %d\n", ParallelLevel1, + ParallelLevel2); + // Final result of Count is (10-6)(num of loops)*10(num of iterations)*2(par + // level) + 6(num of iterations) * 1(par level) + // CHECK: Expected count = 86 + printf("Expected count = %d\n", Count); + + return isHost; +} diff --git a/final/libomptarget/deviceRTLs/nvptx/test/parallel/thread_limit.c b/final/libomptarget/deviceRTLs/nvptx/test/parallel/thread_limit.c new file mode 100644 index 0000000..5e40bb5 --- /dev/null +++ b/final/libomptarget/deviceRTLs/nvptx/test/parallel/thread_limit.c @@ -0,0 +1,77 @@ +// RUN: %compile-run-and-check + +#include <stdio.h> +#include <omp.h> + +const int WarpSize = 32; +const int ThreadLimit = 1 * WarpSize; +const int NumThreads2 = 2 * WarpSize; +const int NumThreads3 = 3 * WarpSize; +const int MaxThreads = 1024; + +int main(int argc, char *argv[]) { + int check1[MaxThreads]; + int check2[MaxThreads]; + int check3[MaxThreads]; + for (int i = 0; i < MaxThreads; i++) { + check1[i] = check2[i] = check3[i] = 0; + } + + int threadLimit = -1; + + #pragma omp target teams num_teams(1) thread_limit(ThreadLimit) \ + map(check1[:], check2[:], check3[:], threadLimit) + { + threadLimit = omp_get_thread_limit(); + + // All parallel regions should get as many threads as specified by the + // thread_limit() clause. + #pragma omp parallel + { + check1[omp_get_thread_num()] += omp_get_num_threads(); + } + + omp_set_num_threads(NumThreads2); + #pragma omp parallel + { + check2[omp_get_thread_num()] += omp_get_num_threads(); + } + + #pragma omp parallel num_threads(NumThreads3) + { + check3[omp_get_thread_num()] += omp_get_num_threads(); + } + } + + // CHECK: threadLimit = 32 + printf("threadLimit = %d\n", threadLimit); + + // CHECK-NOT: invalid + for (int i = 0; i < MaxThreads; i++) { + if (i < ThreadLimit) { + if (check1[i] != ThreadLimit) { + printf("invalid: check1[%d] should be %d, is %d\n", i, ThreadLimit, check1[i]); + } + } else if (check1[i] != 0) { + printf("invalid: check1[%d] should be 0, is %d\n", i, check1[i]); + } + + if (i < ThreadLimit) { + if (check2[i] != ThreadLimit) { + printf("invalid: check2[%d] should be %d, is %d\n", i, ThreadLimit, check2[i]); + } + } else if (check2[i] != 0) { + printf("invalid: check2[%d] should be 0, is %d\n", i, check2[i]); + } + + if (i < ThreadLimit) { + if (check3[i] != ThreadLimit) { + printf("invalid: check3[%d] should be %d, is %d\n", i, ThreadLimit, check3[i]); + } + } else if (check3[i] != 0) { + printf("invalid: check3[%d] should be 0, is %d\n", i, check3[i]); + } + } + + return 0; +} diff --git a/final/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c b/final/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c new file mode 100644 index 0000000..b3f8768 --- /dev/null +++ b/final/libomptarget/deviceRTLs/nvptx/test/parallel/tripcount.c @@ -0,0 +1,22 @@ +// RUN: %compile-run-and-check + +#include <omp.h> +#include <stdio.h> + +int main() { + int res = 0; + +#pragma omp parallel num_threads(2) reduction(+:res) + { + int tid = omp_get_thread_num(); +#pragma omp target teams distribute reduction(+:res) + for (int i = tid; i < 2; i++) + ++res; + } + // The first thread makes 2 iterations, the second - 1. Expected result of the + // reduction res is 3. + + // CHECK: res = 3. + printf("res = %d.\n", res); + return 0; +} |