aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJon Chesterfield <jonathanchesterfield@gmail.com>2019-10-04 22:30:28 +0000
committerJon Chesterfield <jonathanchesterfield@gmail.com>2019-10-04 22:30:28 +0000
commit731769fe89c8e17850bc9df82d2ed248632a59ad (patch)
tree928beb2e1f028b9bbcdc347c79baa2aa99182fea
parent3bdcaec89abb1affe3113894bd96e64f275395c4 (diff)
downloadopenmp-731769fe89c8e17850bc9df82d2ed248632a59ad.tar.gz
[libomptarget][nfc] Update remaining uint32 to use lanemask_t
Summary: [libomptarget][nfc] Update remaining uint32 to use lanemask_t Update a few functions in the API to use lanemask_t instead of i32. NFC for nvptx. Also update the ActiveThreads type in DataSharingStateTy. This removes a lot of #ifdef from the downsteam amdgcn implementation. Reviewers: ABataev, jdoerfert, grokos, ronlieb, RaviNarayanaswamy Subscribers: openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D68513 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@373806 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/data_sharing.cu8
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/interface.h16
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/loop.cu3
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h2
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/parallel.cu14
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/sync.cu4
6 files changed, 27 insertions, 20 deletions
diff --git a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index bd4cfec..5e936b0 100644
--- a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -96,7 +96,7 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS,
EXTERN void *__kmpc_data_sharing_environment_begin(
__kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
- void **SavedSharedFrame, int32_t *SavedActiveThreads,
+ void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
size_t SharingDataSize, size_t SharingDefaultDataSize,
int16_t IsOMPRuntimeInitialized) {
@@ -117,7 +117,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
__kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
void *&StackP = DataSharingState.StackPtr[WID];
void * volatile &FrameP = DataSharingState.FramePtr[WID];
- int32_t &ActiveT = DataSharingState.ActiveThreads[WID];
+ __kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID];
DSPRINT0(DSFLAG, "Save current slot/stack values.\n");
// Save the current values.
@@ -225,7 +225,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
EXTERN void __kmpc_data_sharing_environment_end(
__kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
- void **SavedSharedFrame, int32_t *SavedActiveThreads,
+ void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
int32_t IsEntryPoint) {
DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_end\n");
@@ -260,7 +260,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
// assume that threads will converge right after the call site that started
// the environment.
if (IsWarpMasterActiveThread()) {
- int32_t &ActiveT = DataSharingState.ActiveThreads[WID];
+ __kmpc_impl_lanemask_t &ActiveT = DataSharingState.ActiveThreads[WID];
DSPRINT0(DSFLAG, "Before restoring the stack\n");
// Zero the bits in the mask. If it is still different from zero, then we
diff --git a/libomptarget/deviceRTLs/nvptx/src/interface.h b/libomptarget/deviceRTLs/nvptx/src/interface.h
index ab57715..4a84922 100644
--- a/libomptarget/deviceRTLs/nvptx/src/interface.h
+++ b/libomptarget/deviceRTLs/nvptx/src/interface.h
@@ -19,6 +19,7 @@
#define _INTERFACES_H_
#include "option.h"
+#include "target_impl.h"
////////////////////////////////////////////////////////////////////////////////
// OpenMP interface
@@ -422,9 +423,9 @@ EXTERN void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid,
EXTERN void __kmpc_flush(kmp_Ident *loc);
// vote
-EXTERN int32_t __kmpc_warp_active_thread_mask();
+EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask();
// syncwarp
-EXTERN void __kmpc_syncwarp(int32_t);
+EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t);
// tasks
EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Ident *loc,
@@ -475,11 +476,13 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
int16_t IsOMPRuntimeInitialized);
EXTERN void __kmpc_kernel_end_parallel();
-EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask,
+EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer,
+ __kmpc_impl_lanemask_t Mask,
bool *IsFinal,
int32_t *LaneSource);
EXTERN void __kmpc_kernel_end_convergent_parallel(void *buffer);
-EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask,
+EXTERN bool __kmpc_kernel_convergent_simd(void *buffer,
+ __kmpc_impl_lanemask_t Mask,
bool *IsFinal, int32_t *LaneSource,
int32_t *LaneId, int32_t *NumLanes);
EXTERN void __kmpc_kernel_end_convergent_simd(void *buffer);
@@ -510,12 +513,13 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *RootS,
size_t InitialDataSize);
EXTERN void *__kmpc_data_sharing_environment_begin(
__kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
- void **SavedSharedFrame, int32_t *SavedActiveThreads,
+ void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
size_t SharingDataSize, size_t SharingDefaultDataSize,
int16_t IsOMPRuntimeInitialized);
EXTERN void __kmpc_data_sharing_environment_end(
__kmpc_data_sharing_slot **SavedSharedSlot, void **SavedSharedStack,
- void **SavedSharedFrame, int32_t *SavedActiveThreads, int32_t IsEntryPoint);
+ void **SavedSharedFrame, __kmpc_impl_lanemask_t *SavedActiveThreads,
+ int32_t IsEntryPoint);
EXTERN void *
__kmpc_get_data_sharing_environment_frame(int32_t SourceThreadID,
diff --git a/libomptarget/deviceRTLs/nvptx/src/loop.cu b/libomptarget/deviceRTLs/nvptx/src/loop.cu
index f9a3015..ee37846 100644
--- a/libomptarget/deviceRTLs/nvptx/src/loop.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/loop.cu
@@ -380,7 +380,8 @@ public:
////////////////////////////////////////////////////////////////////////////////
// Support for dispatch next
- INLINE static int64_t Shuffle(unsigned active, int64_t val, int leader) {
+ INLINE static uint64_t Shuffle(__kmpc_impl_lanemask_t active, int64_t val,
+ int leader) {
uint32_t lo, hi;
__kmpc_impl_unpack(val, lo, hi);
hi = __kmpc_impl_shfl_sync(active, hi, leader);
diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index 2299d24..70e6c28 100644
--- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -107,7 +107,7 @@ struct DataSharingStateTy {
__kmpc_data_sharing_slot *SlotPtr[DS_Max_Warp_Number];
void *StackPtr[DS_Max_Warp_Number];
void * volatile FramePtr[DS_Max_Warp_Number];
- int32_t ActiveThreads[DS_Max_Warp_Number];
+ __kmpc_impl_lanemask_t ActiveThreads[DS_Max_Warp_Number];
};
// Additional worker slot type which is initialized with the default worker slot
// size of 4*32 bytes.
diff --git a/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/libomptarget/deviceRTLs/nvptx/src/parallel.cu
index 24a235d..016ded8 100644
--- a/libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/parallel.cu
@@ -44,13 +44,14 @@ typedef struct ConvergentSimdJob {
////////////////////////////////////////////////////////////////////////////////
// support for convergent simd (team of threads in a warp only)
////////////////////////////////////////////////////////////////////////////////
-EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask,
+EXTERN bool __kmpc_kernel_convergent_simd(void *buffer,
+ __kmpc_impl_lanemask_t Mask,
bool *IsFinal, int32_t *LaneSource,
int32_t *LaneId, int32_t *NumLanes) {
PRINT0(LD_IO, "call to __kmpc_kernel_convergent_simd\n");
- uint32_t ConvergentMask = Mask;
+ __kmpc_impl_lanemask_t ConvergentMask = Mask;
int32_t ConvergentSize = __kmpc_impl_popc(ConvergentMask);
- uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
+ __kmpc_impl_lanemask_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
*LaneSource += __kmpc_impl_ffs(WorkRemaining);
*IsFinal = __kmpc_impl_popc(WorkRemaining) == 1;
__kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt();
@@ -117,13 +118,14 @@ typedef struct ConvergentParallelJob {
////////////////////////////////////////////////////////////////////////////////
// support for convergent parallelism (team of threads in a warp only)
////////////////////////////////////////////////////////////////////////////////
-EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask,
+EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer,
+ __kmpc_impl_lanemask_t Mask,
bool *IsFinal,
int32_t *LaneSource) {
PRINT0(LD_IO, "call to __kmpc_kernel_convergent_parallel\n");
- uint32_t ConvergentMask = Mask;
+ __kmpc_impl_lanemask_t ConvergentMask = Mask;
int32_t ConvergentSize = __kmpc_impl_popc(ConvergentMask);
- uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
+ __kmpc_impl_lanemask_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
*LaneSource += __kmpc_impl_ffs(WorkRemaining);
*IsFinal = __kmpc_impl_popc(WorkRemaining) == 1;
__kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt();
diff --git a/libomptarget/deviceRTLs/nvptx/src/sync.cu b/libomptarget/deviceRTLs/nvptx/src/sync.cu
index 343293e..28a5419 100644
--- a/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -140,7 +140,7 @@ EXTERN void __kmpc_flush(kmp_Ident *loc) {
// Vote
////////////////////////////////////////////////////////////////////////////////
-EXTERN int32_t __kmpc_warp_active_thread_mask() {
+EXTERN __kmpc_impl_lanemask_t __kmpc_warp_active_thread_mask() {
PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n");
return __kmpc_impl_activemask();
}
@@ -149,7 +149,7 @@ EXTERN int32_t __kmpc_warp_active_thread_mask() {
// Syncwarp
////////////////////////////////////////////////////////////////////////////////
-EXTERN void __kmpc_syncwarp(int32_t Mask) {
+EXTERN void __kmpc_syncwarp(__kmpc_impl_lanemask_t Mask) {
PRINT0(LD_IO, "call __kmpc_syncwarp\n");
__kmpc_impl_syncwarp(Mask);
}