aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJon Chesterfield <jonathanchesterfield@gmail.com>2019-10-04 21:39:22 +0000
committerJon Chesterfield <jonathanchesterfield@gmail.com>2019-10-04 21:39:22 +0000
commit3bdcaec89abb1affe3113894bd96e64f275395c4 (patch)
treefa8b7770ed7b217daa32f7ae60b60048ec26cf4e
parent85e24591e4ebdbae12cf85f55ade8c80653219cb (diff)
downloadopenmp-3bdcaec89abb1affe3113894bd96e64f275395c4.tar.gz
Use named constant to indicate all lanes, to handle 32 and 64 wide architectures
Summary: Use named constant to indicate all lanes, to handle 32 and 64 wide architectures Reviewers: ABataev, jdoerfert, grokos, ronlieb Reviewed By: grokos Subscribers: ronlieb, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D68369 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@373793 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/parallel.cu4
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/reduction.cu12
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/target_impl.h2
3 files changed, 10 insertions, 8 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/libomptarget/deviceRTLs/nvptx/src/parallel.cu
index 5db443c..24a235d 100644
--- a/libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/parallel.cu
@@ -320,7 +320,7 @@ EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
// can be changed incorrectly because of threads divergence.
bool IsActiveParallelRegion = threadsInTeam != 1;
IncParallelLevel(IsActiveParallelRegion,
- IsActiveParallelRegion ? 0xFFFFFFFF : 1u);
+ IsActiveParallelRegion ? __kmpc_impl_all_lanes : 1u);
}
return isActive;
@@ -347,7 +347,7 @@ EXTERN void __kmpc_kernel_end_parallel() {
// be changed incorrectly because of threads divergence.
bool IsActiveParallelRegion = threadsInTeam != 1;
DecParallelLevel(IsActiveParallelRegion,
- IsActiveParallelRegion ? 0xFFFFFFFF : 1u);
+ IsActiveParallelRegion ? __kmpc_impl_all_lanes : 1u);
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/libomptarget/deviceRTLs/nvptx/src/reduction.cu
index 347c556..cee3e5d 100644
--- a/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -24,14 +24,14 @@ EXTERN
void __kmpc_nvptx_end_reduce_nowait(int32_t global_tid) {}
EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size) {
- return __kmpc_impl_shfl_down_sync(0xFFFFFFFF, val, delta, size);
+ return __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, val, delta, size);
}
EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) {
uint32_t lo, hi;
__kmpc_impl_unpack(val, lo, hi);
- hi = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, hi, delta, size);
- lo = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, lo, delta, size);
+ hi = __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, hi, delta, size);
+ lo = __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, lo, delta, size);
return __kmpc_impl_pack(lo, hi);
}
@@ -82,7 +82,7 @@ int32_t __kmpc_nvptx_simd_reduce_nowait(int32_t global_tid, int32_t num_vars,
kmp_ShuffleReductFctPtr shflFct,
kmp_InterWarpCopyFctPtr cpyFct) {
__kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask();
- if (Liveness == 0xffffffff) {
+ if (Liveness == __kmpc_impl_all_lanes) {
gpu_regular_warp_reduce(reduce_data, shflFct);
return GetThreadIdInBlock() % WARPSIZE ==
0; // Result on lane 0 of the simd warp.
@@ -143,7 +143,7 @@ static int32_t nvptx_parallel_reduce_nowait(
return BlockThreadId == 0;
#else
__kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask();
- if (Liveness == 0xffffffff) // Full warp
+ if (Liveness == __kmpc_impl_all_lanes) // Full warp
gpu_regular_warp_reduce(reduce_data, shflFct);
else if (!(Liveness & (Liveness + 1))) // Partial warp but contiguous lanes
gpu_irregular_warp_reduce(reduce_data, shflFct,
@@ -318,7 +318,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars,
// Reduce across warps to the warp master.
__kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask();
- if (Liveness == 0xffffffff) // Full warp
+ if (Liveness == __kmpc_impl_all_lanes) // Full warp
gpu_regular_warp_reduce(reduce_data, shflFct);
else // Partial warp but contiguous lanes
gpu_irregular_warp_reduce(reduce_data, shflFct,
diff --git a/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index 8008143..37a125d 100644
--- a/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -27,6 +27,8 @@ INLINE uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) {
}
typedef uint32_t __kmpc_impl_lanemask_t;
+static const __kmpc_impl_lanemask_t __kmpc_impl_all_lanes =
+ UINT32_C(0xffffffff);
INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() {
__kmpc_impl_lanemask_t res;