diff options
author | Hans Wennborg <hans@hanshq.net> | 2019-02-27 18:49:24 +0000 |
---|---|---|
committer | Hans Wennborg <hans@hanshq.net> | 2019-02-27 18:49:24 +0000 |
commit | 78e6ab58a5a641e07f6d8dab15bdb16210f3afba (patch) | |
tree | 93c484acf8bc75deaf25f89921d757a15337cba4 /rc3/libomptarget/deviceRTLs/nvptx/src/libcall.cu | |
parent | d94992145eaea998bb8901485373e60cb079232c (diff) |
Creating release candidate rc3 from release_800 branch
git-svn-id: https://llvm.org/svn/llvm-project/openmp/tags/RELEASE_800@355016 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'rc3/libomptarget/deviceRTLs/nvptx/src/libcall.cu')
-rw-r--r-- | rc3/libomptarget/deviceRTLs/nvptx/src/libcall.cu | 515 |
1 files changed, 515 insertions, 0 deletions
diff --git a/rc3/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/rc3/libomptarget/deviceRTLs/nvptx/src/libcall.cu new file mode 100644 index 0000000..63bf6b4 --- /dev/null +++ b/rc3/libomptarget/deviceRTLs/nvptx/src/libcall.cu @@ -0,0 +1,515 @@ +//===------------ libcall.cu - NVPTX OpenMP user calls ----------- CUDA -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.txt for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the OpenMP runtime functions that can be +// invoked by the user in an OpenMP region +// +//===----------------------------------------------------------------------===// + +#include "omptarget-nvptx.h" + +// Timer precision is 1ns +#define TIMER_PRECISION ((double)1E-9) + +EXTERN double omp_get_wtick(void) { + PRINT(LD_IO, "omp_get_wtick() returns %g\n", TIMER_PRECISION); + return TIMER_PRECISION; +} + +EXTERN double omp_get_wtime(void) { + unsigned long long nsecs; + asm("mov.u64 %0, %%globaltimer;" : "=l"(nsecs)); + double rc = (double)nsecs * TIMER_PRECISION; + PRINT(LD_IO, "call omp_get_wtime() returns %g\n", rc); + return rc; +} + +EXTERN void omp_set_num_threads(int num) { + // Ignore it for SPMD mode. + if (isSPMDMode()) + return; + ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Expected initialized runtime."); + PRINT(LD_IO, "call omp_set_num_threads(num %d)\n", num); + if (num <= 0) { + WARNING0(LW_INPUT, "expected positive num; ignore\n"); + } else { + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(/*isSPMDExecutionMode=*/false); + currTaskDescr->NThreads() = num; + } +} + +EXTERN int omp_get_num_threads(void) { + bool isSPMDExecutionMode = isSPMDMode(); + int tid = GetLogicalThreadIdInBlock(isSPMDExecutionMode); + int rc = + GetNumberOfOmpThreads(tid, isSPMDExecutionMode, isRuntimeUninitialized()); + PRINT(LD_IO, "call omp_get_num_threads() return %d\n", rc); + return rc; +} + +EXTERN int omp_get_max_threads(void) { + if (isRuntimeUninitialized()) { + ASSERT0(LT_FUSSY, isSPMDMode(), + "Expected SPMD mode only with uninitialized runtime."); + // We're already in parallel region. + return 1; // default is 1 thread avail + } + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); + int rc = 1; // default is 1 thread avail + if (!currTaskDescr->InParallelRegion()) { + // Not currently in a parallel region, return what was set. + rc = currTaskDescr->NThreads(); + ASSERT0(LT_FUSSY, rc >= 0, "bad number of threads"); + } + PRINT(LD_IO, "call omp_get_max_threads() return %d\n", rc); + return rc; +} + +EXTERN int omp_get_thread_limit(void) { + if (isRuntimeUninitialized()) { + ASSERT0(LT_FUSSY, isSPMDMode(), + "Expected SPMD mode only with uninitialized runtime."); + return 0; // default is 0 + } + // per contention group.. meaning threads in current team + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); + int rc = currTaskDescr->ThreadLimit(); + PRINT(LD_IO, "call omp_get_thread_limit() return %d\n", rc); + return rc; +} + +EXTERN int omp_get_thread_num() { + bool isSPMDExecutionMode = isSPMDMode(); + int tid = GetLogicalThreadIdInBlock(isSPMDExecutionMode); + int rc = GetOmpThreadId(tid, isSPMDExecutionMode, isRuntimeUninitialized()); + PRINT(LD_IO, "call omp_get_thread_num() returns %d\n", rc); + return rc; +} + +EXTERN int omp_get_num_procs(void) { + int rc = GetNumberOfProcsInDevice(isSPMDMode()); + PRINT(LD_IO, "call omp_get_num_procs() returns %d\n", rc); + return rc; +} + +EXTERN int omp_in_parallel(void) { + int rc = 0; + if (isRuntimeUninitialized()) { + ASSERT0(LT_FUSSY, isSPMDMode(), + "Expected SPMD mode only with uninitialized runtime."); + rc = 1; // SPMD mode is always in parallel. + } else { + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); + if (currTaskDescr->InParallelRegion()) { + rc = 1; + } + } + PRINT(LD_IO, "call omp_in_parallel() returns %d\n", rc); + return rc; +} + +EXTERN int omp_in_final(void) { + // treat all tasks as final... Specs may expect runtime to keep + // track more precisely if a task was actively set by users... This + // is not explicitely specified; will treat as if runtime can + // actively decide to put a non-final task into a final one. + int rc = 1; + PRINT(LD_IO, "call omp_in_final() returns %d\n", rc); + return rc; +} + +EXTERN void omp_set_dynamic(int flag) { + PRINT(LD_IO, "call omp_set_dynamic(%d) is ignored (no support)\n", flag); +} + +EXTERN int omp_get_dynamic(void) { + int rc = 0; + PRINT(LD_IO, "call omp_get_dynamic() returns %d\n", rc); + return rc; +} + +EXTERN void omp_set_nested(int flag) { + PRINT(LD_IO, "call omp_set_nested(%d) is ignored (no nested support)\n", + flag); +} + +EXTERN int omp_get_nested(void) { + int rc = 0; + PRINT(LD_IO, "call omp_get_nested() returns %d\n", rc); + return rc; +} + +EXTERN void omp_set_max_active_levels(int level) { + PRINT(LD_IO, + "call omp_set_max_active_levels(%d) is ignored (no nested support)\n", + level); +} + +EXTERN int omp_get_max_active_levels(void) { + int rc = 1; + PRINT(LD_IO, "call omp_get_max_active_levels() returns %d\n", rc); + return rc; +} + +EXTERN int omp_get_level(void) { + if (isRuntimeUninitialized()) { + ASSERT0(LT_FUSSY, isSPMDMode(), + "Expected SPMD mode only with uninitialized runtime."); + return parallelLevel; + } + int level = 0; + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); + ASSERT0(LT_FUSSY, currTaskDescr, + "do not expect fct to be called in a non-active thread"); + do { + if (currTaskDescr->IsParallelConstruct()) { + level++; + } + currTaskDescr = currTaskDescr->GetPrevTaskDescr(); + } while (currTaskDescr); + PRINT(LD_IO, "call omp_get_level() returns %d\n", level); + return level; +} + +EXTERN int omp_get_active_level(void) { + if (isRuntimeUninitialized()) { + ASSERT0(LT_FUSSY, isSPMDMode(), + "Expected SPMD mode only with uninitialized runtime."); + return 1; + } + int level = 0; // no active level parallelism + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); + ASSERT0(LT_FUSSY, currTaskDescr, + "do not expect fct to be called in a non-active thread"); + do { + if (currTaskDescr->ThreadsInTeam() > 1) { + // has a parallel with more than one thread in team + level = 1; + break; + } + currTaskDescr = currTaskDescr->GetPrevTaskDescr(); + } while (currTaskDescr); + PRINT(LD_IO, "call omp_get_active_level() returns %d\n", level) + return level; +} + +EXTERN int omp_get_ancestor_thread_num(int level) { + if (isRuntimeUninitialized()) { + ASSERT0(LT_FUSSY, isSPMDMode(), + "Expected SPMD mode only with uninitialized runtime."); + return level == 1 ? GetThreadIdInBlock() : 0; + } + int rc = -1; + if (level == 0) { + rc = 0; + } else if (level > 0) { + int totLevel = omp_get_level(); + if (level <= totLevel) { + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); + int steps = totLevel - level; + PRINT(LD_IO, "backtrack %d steps\n", steps); + ASSERT0(LT_FUSSY, currTaskDescr, + "do not expect fct to be called in a non-active thread"); + do { + if (DON(LD_IOD)) { + // print current state + omp_sched_t sched = currTaskDescr->GetRuntimeSched(); + PRINT(LD_ALL, + "task descr %s %d: %s, in par %d, rt sched %d," + " chunk %" PRIu64 "; tid %d, tnum %d, nthreads %d\n", + "ancestor", steps, + (currTaskDescr->IsParallelConstruct() ? "par" : "task"), + (int)currTaskDescr->InParallelRegion(), (int)sched, + currTaskDescr->RuntimeChunkSize(), + (int)currTaskDescr->ThreadId(), + (int)currTaskDescr->ThreadsInTeam(), + (int)currTaskDescr->NThreads()); + } + + if (currTaskDescr->IsParallelConstruct()) { + // found the level + if (!steps) { + rc = currTaskDescr->ThreadId(); + break; + } + steps--; + } + currTaskDescr = currTaskDescr->GetPrevTaskDescr(); + } while (currTaskDescr); + ASSERT0(LT_FUSSY, !steps, "expected to find all steps"); + } + } + PRINT(LD_IO, "call omp_get_ancestor_thread_num(level %d) returns %d\n", level, + rc) + return rc; +} + +EXTERN int omp_get_team_size(int level) { + if (isRuntimeUninitialized()) { + ASSERT0(LT_FUSSY, isSPMDMode(), + "Expected SPMD mode only with uninitialized runtime."); + return level == 1 ? GetNumberOfThreadsInBlock() : 1; + } + int rc = -1; + if (level == 0) { + rc = 1; + } else if (level > 0) { + int totLevel = omp_get_level(); + if (level <= totLevel) { + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); + int steps = totLevel - level; + ASSERT0(LT_FUSSY, currTaskDescr, + "do not expect fct to be called in a non-active thread"); + do { + if (currTaskDescr->IsParallelConstruct()) { + if (!steps) { + // found the level + rc = currTaskDescr->ThreadsInTeam(); + break; + } + steps--; + } + currTaskDescr = currTaskDescr->GetPrevTaskDescr(); + } while (currTaskDescr); + ASSERT0(LT_FUSSY, !steps, "expected to find all steps"); + } + } + PRINT(LD_IO, "call omp_get_team_size(level %d) returns %d\n", level, rc) + return rc; +} + +EXTERN void omp_get_schedule(omp_sched_t *kind, int *modifier) { + if (isRuntimeUninitialized()) { + ASSERT0(LT_FUSSY, isSPMDMode(), + "Expected SPMD mode only with uninitialized runtime."); + *kind = omp_sched_static; + *modifier = 1; + } else { + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); + *kind = currTaskDescr->GetRuntimeSched(); + *modifier = currTaskDescr->RuntimeChunkSize(); + } + PRINT(LD_IO, "call omp_get_schedule returns sched %d and modif %d\n", + (int)*kind, *modifier); +} + +EXTERN void omp_set_schedule(omp_sched_t kind, int modifier) { + PRINT(LD_IO, "call omp_set_schedule(sched %d, modif %d)\n", (int)kind, + modifier); + if (isRuntimeUninitialized()) { + ASSERT0(LT_FUSSY, isSPMDMode(), + "Expected SPMD mode only with uninitialized runtime."); + return; + } + if (kind >= omp_sched_static && kind < omp_sched_auto) { + omptarget_nvptx_TaskDescr *currTaskDescr = + getMyTopTaskDescriptor(isSPMDMode()); + currTaskDescr->SetRuntimeSched(kind); + currTaskDescr->RuntimeChunkSize() = modifier; + PRINT(LD_IOD, "omp_set_schedule did set sched %d & modif %" PRIu64 "\n", + (int)currTaskDescr->GetRuntimeSched(), + currTaskDescr->RuntimeChunkSize()); + } +} + +EXTERN omp_proc_bind_t omp_get_proc_bind(void) { + PRINT0(LD_IO, "call omp_get_proc_bin() is true, regardless on state\n"); + return omp_proc_bind_true; +} + +EXTERN int omp_get_num_places(void) { + PRINT0(LD_IO, "call omp_get_num_places() returns 0\n"); + return 0; +} + +EXTERN int omp_get_place_num_procs(int place_num) { + PRINT0(LD_IO, "call omp_get_place_num_procs() returns 0\n"); + return 0; +} + +EXTERN void omp_get_place_proc_ids(int place_num, int *ids) { + PRINT0(LD_IO, "call to omp_get_place_proc_ids()\n"); +} + +EXTERN int omp_get_place_num(void) { + PRINT0(LD_IO, "call to omp_get_place_num() returns 0\n"); + return 0; +} + +EXTERN int omp_get_partition_num_places(void) { + PRINT0(LD_IO, "call to omp_get_partition_num_places() returns 0\n"); + return 0; +} + +EXTERN void omp_get_partition_place_nums(int *place_nums) { + PRINT0(LD_IO, "call to omp_get_partition_place_nums()\n"); +} + +EXTERN int omp_get_cancellation(void) { + int rc = FALSE; // currently false only + PRINT(LD_IO, "call omp_get_cancellation() returns %d\n", rc); + return rc; +} + +EXTERN void omp_set_default_device(int deviceId) { + PRINT0(LD_IO, "call omp_get_default_device() is undef on device\n"); +} + +EXTERN int omp_get_default_device(void) { + PRINT0(LD_IO, + "call omp_get_default_device() is undef on device, returns 0\n"); + return 0; +} + +EXTERN int omp_get_num_devices(void) { + PRINT0(LD_IO, "call omp_get_num_devices() is undef on device, returns 0\n"); + return 0; +} + +EXTERN int omp_get_num_teams(void) { + int rc = GetNumberOfOmpTeams(); + PRINT(LD_IO, "call omp_get_num_teams() returns %d\n", rc); + return rc; +} + +EXTERN int omp_get_team_num() { + int rc = GetOmpTeamId(); + PRINT(LD_IO, "call omp_get_team_num() returns %d\n", rc); + return rc; +} + +EXTERN int omp_is_initial_device(void) { + PRINT0(LD_IO, "call omp_is_initial_device() returns 0\n"); + return 0; // 0 by def on device +} + +// Unspecified on the device. +EXTERN int omp_get_initial_device(void) { + PRINT0(LD_IO, "call omp_get_initial_device() returns 0\n"); + return 0; +} + +// Unused for now. +EXTERN int omp_get_max_task_priority(void) { + PRINT0(LD_IO, "call omp_get_max_task_priority() returns 0\n"); + return 0; +} + +//////////////////////////////////////////////////////////////////////////////// +// locks +//////////////////////////////////////////////////////////////////////////////// + +#define __OMP_SPIN 1000 +#define UNSET 0 +#define SET 1 + +EXTERN void omp_init_lock(omp_lock_t *lock) { + omp_unset_lock(lock); + PRINT0(LD_IO, "call omp_init_lock()\n"); +} + +EXTERN void omp_destroy_lock(omp_lock_t *lock) { + omp_unset_lock(lock); + PRINT0(LD_IO, "call omp_destroy_lock()\n"); +} + +EXTERN void omp_set_lock(omp_lock_t *lock) { + // int atomicCAS(int* address, int compare, int val); + // (old == compare ? val : old) + + // TODO: not sure spinning is a good idea here.. + while (atomicCAS(lock, UNSET, SET) != UNSET) { + clock_t start = clock(); + clock_t now; + for (;;) { + now = clock(); + clock_t cycles = now > start ? now - start : now + (0xffffffff - start); + if (cycles >= __OMP_SPIN * blockIdx.x) { + break; + } + } + } // wait for 0 to be the read value + + PRINT0(LD_IO, "call omp_set_lock()\n"); +} + +EXTERN void omp_unset_lock(omp_lock_t *lock) { + (void)atomicExch(lock, UNSET); + + PRINT0(LD_IO, "call omp_unset_lock()\n"); +} + +EXTERN int omp_test_lock(omp_lock_t *lock) { + // int atomicCAS(int* address, int compare, int val); + // (old == compare ? val : old) + int ret = atomicAdd(lock, 0); + + PRINT(LD_IO, "call omp_test_lock() return %d\n", ret); + + return ret; +} + +// for xlf Fotran +// Fotran, the return is LOGICAL type + +#define FLOGICAL long +EXTERN FLOGICAL __xlf_omp_is_initial_device_i8() { + int ret = omp_is_initial_device(); + if (ret == 0) + return (FLOGICAL)0; + else + return (FLOGICAL)1; +} + +EXTERN int __xlf_omp_is_initial_device_i4() { + int ret = omp_is_initial_device(); + if (ret == 0) + return 0; + else + return 1; +} + +EXTERN long __xlf_omp_get_team_num_i4() { + int ret = omp_get_team_num(); + return (long)ret; +} + +EXTERN long __xlf_omp_get_num_teams_i4() { + int ret = omp_get_num_teams(); + return (long)ret; +} + +EXTERN void xlf_debug_print_int(int *p) { + printf("xlf DEBUG %d): %p %d\n", omp_get_team_num(), p, p == 0 ? 0 : *p); +} + +EXTERN void xlf_debug_print_long(long *p) { + printf("xlf DEBUG %d): %p %ld\n", omp_get_team_num(), p, p == 0 ? 0 : *p); +} + +EXTERN void xlf_debug_print_float(float *p) { + printf("xlf DEBUG %d): %p %f\n", omp_get_team_num(), p, p == 0 ? 0 : *p); +} + +EXTERN void xlf_debug_print_double(double *p) { + printf("xlf DEBUG %d): %p %f\n", omp_get_team_num(), p, p == 0 ? 0 : *p); +} + +EXTERN void xlf_debug_print_addr(void *p) { + printf("xlf DEBUG %d): %p \n", omp_get_team_num(), p); +} |