diff options
author | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2019-10-04 21:39:22 +0000 |
---|---|---|
committer | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2019-10-04 21:39:22 +0000 |
commit | 3bdcaec89abb1affe3113894bd96e64f275395c4 (patch) | |
tree | fa8b7770ed7b217daa32f7ae60b60048ec26cf4e /libomptarget | |
parent | 85e24591e4ebdbae12cf85f55ade8c80653219cb (diff) |
Use named constant to indicate all lanes, to handle 32 and 64 wide architectures
Summary: Use named constant to indicate all lanes, to handle 32 and 64 wide architectures
Reviewers: ABataev, jdoerfert, grokos, ronlieb
Reviewed By: grokos
Subscribers: ronlieb, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D68369
git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@373793 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'libomptarget')
-rw-r--r-- | libomptarget/deviceRTLs/nvptx/src/parallel.cu | 4 | ||||
-rw-r--r-- | libomptarget/deviceRTLs/nvptx/src/reduction.cu | 12 | ||||
-rw-r--r-- | libomptarget/deviceRTLs/nvptx/src/target_impl.h | 2 |
3 files changed, 10 insertions, 8 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/libomptarget/deviceRTLs/nvptx/src/parallel.cu index 5db443c..24a235d 100644 --- a/libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ b/libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -320,7 +320,7 @@ EXTERN bool __kmpc_kernel_parallel(void **WorkFn, // can be changed incorrectly because of threads divergence. bool IsActiveParallelRegion = threadsInTeam != 1; IncParallelLevel(IsActiveParallelRegion, - IsActiveParallelRegion ? 0xFFFFFFFF : 1u); + IsActiveParallelRegion ? __kmpc_impl_all_lanes : 1u); } return isActive; @@ -347,7 +347,7 @@ EXTERN void __kmpc_kernel_end_parallel() { // be changed incorrectly because of threads divergence. bool IsActiveParallelRegion = threadsInTeam != 1; DecParallelLevel(IsActiveParallelRegion, - IsActiveParallelRegion ? 0xFFFFFFFF : 1u); + IsActiveParallelRegion ? __kmpc_impl_all_lanes : 1u); } //////////////////////////////////////////////////////////////////////////////// diff --git a/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/libomptarget/deviceRTLs/nvptx/src/reduction.cu index 347c556..cee3e5d 100644 --- a/libomptarget/deviceRTLs/nvptx/src/reduction.cu +++ b/libomptarget/deviceRTLs/nvptx/src/reduction.cu @@ -24,14 +24,14 @@ EXTERN void __kmpc_nvptx_end_reduce_nowait(int32_t global_tid) {} EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size) { - return __kmpc_impl_shfl_down_sync(0xFFFFFFFF, val, delta, size); + return __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, val, delta, size); } EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) { uint32_t lo, hi; __kmpc_impl_unpack(val, lo, hi); - hi = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, hi, delta, size); - lo = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, lo, delta, size); + hi = __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, hi, delta, size); + lo = __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, lo, delta, size); return __kmpc_impl_pack(lo, hi); } @@ -82,7 +82,7 @@ int32_t __kmpc_nvptx_simd_reduce_nowait(int32_t global_tid, int32_t num_vars, kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct) { __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask(); - if (Liveness == 0xffffffff) { + if (Liveness == __kmpc_impl_all_lanes) { gpu_regular_warp_reduce(reduce_data, shflFct); return GetThreadIdInBlock() % WARPSIZE == 0; // Result on lane 0 of the simd warp. @@ -143,7 +143,7 @@ static int32_t nvptx_parallel_reduce_nowait( return BlockThreadId == 0; #else __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask(); - if (Liveness == 0xffffffff) // Full warp + if (Liveness == __kmpc_impl_all_lanes) // Full warp gpu_regular_warp_reduce(reduce_data, shflFct); else if (!(Liveness & (Liveness + 1))) // Partial warp but contiguous lanes gpu_irregular_warp_reduce(reduce_data, shflFct, @@ -318,7 +318,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars, // Reduce across warps to the warp master. __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask(); - if (Liveness == 0xffffffff) // Full warp + if (Liveness == __kmpc_impl_all_lanes) // Full warp gpu_regular_warp_reduce(reduce_data, shflFct); else // Partial warp but contiguous lanes gpu_irregular_warp_reduce(reduce_data, shflFct, diff --git a/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/libomptarget/deviceRTLs/nvptx/src/target_impl.h index 8008143..37a125d 100644 --- a/libomptarget/deviceRTLs/nvptx/src/target_impl.h +++ b/libomptarget/deviceRTLs/nvptx/src/target_impl.h @@ -27,6 +27,8 @@ INLINE uint64_t __kmpc_impl_pack(uint32_t lo, uint32_t hi) { } typedef uint32_t __kmpc_impl_lanemask_t; +static const __kmpc_impl_lanemask_t __kmpc_impl_all_lanes = + UINT32_C(0xffffffff); INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() { __kmpc_impl_lanemask_t res; |