aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2019-10-16 16:59:01 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2019-10-16 16:59:01 +0000
commit88700afba7f6176b9c437a6b66d81fd484ad845d (patch)
tree80446d2aca550e761683d3c3f5a8161ceb5e0789
parent5cd7a6f916b7b11f9955587e413af95c3f45bc7b (diff)
[OPENMP]Use different addresses for zeroed thread_id/bound_id.
When the parallel region is called directly in the sequential region, the zeroed tid/bound id are used. But they must point to the different memory locations as the parameters are marked as noalias. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@375017 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp10
-rw-r--r--lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp35
-rw-r--r--test/OpenMP/nvptx_target_codegen.cpp4
-rw-r--r--test/OpenMP/nvptx_teams_reduction_codegen.cpp1
-rw-r--r--test/OpenMP/parallel_if_codegen.cpp15
5 files changed, 40 insertions, 25 deletions
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 42d18b4098..7ff36c79f6 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -3075,14 +3075,18 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.EmitRuntimeCall(
RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
- // OutlinedFn(&GTid, &zero, CapturedStruct);
+ // OutlinedFn(&zero, &zero_bound, CapturedStruct);
Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
- /*Name*/ ".zero.addr");
+ /*Name=*/".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ Address ZeroAddrBound =
+ CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".bound.zero.addr");
+ CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
// ThreadId for serialized parallels is 0.
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
- OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 63093e7325..291052109e 100644
--- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -2459,9 +2459,8 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
if (!CGF.HaveInsertPoint())
return;
- Address ZeroAddr = CGF.CreateMemTemp(
- CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
- /*Name*/ ".zero.addr");
+ Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
@@ -2490,16 +2489,19 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
// Force inline this outlined function at its call site.
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
- Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
- /*DestWidth=*/32, /*Signed=*/1),
- ".zero.addr");
+ Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
// ThreadId for serialized parallels is 0.
Address ThreadIDAddr = ZeroAddr;
- auto &&CodeGen = [this, Fn, CapturedVars, Loc, ZeroAddr, &ThreadIDAddr](
+ auto &&CodeGen = [this, Fn, CapturedVars, Loc, &ThreadIDAddr](
CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
+ Address ZeroAddr =
+ CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".bound.zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
@@ -2656,17 +2658,19 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
//
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
- Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
- /*DestWidth=*/32, /*Signed=*/1),
- ".zero.addr");
+ Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
// ThreadId for serialized parallels is 0.
Address ThreadIDAddr = ZeroAddr;
- auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr,
- &ThreadIDAddr](CodeGenFunction &CGF,
- PrePostActionTy &Action) {
+ auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, &ThreadIDAddr](
+ CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
+ Address ZeroAddr =
+ CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".bound.zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
@@ -4567,9 +4571,8 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
const auto *RD = CS.getCapturedRecordDecl();
auto CurField = RD->field_begin();
- Address ZeroAddr = CGF.CreateMemTemp(
- CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
- /*Name*/ ".zero.addr");
+ Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+ /*Name=*/".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
// Get the array of arguments.
SmallVector<llvm::Value *, 8> Args;
diff --git a/test/OpenMP/nvptx_target_codegen.cpp b/test/OpenMP/nvptx_target_codegen.cpp
index 84ff991006..333f9166d0 100644
--- a/test/OpenMP/nvptx_target_codegen.cpp
+++ b/test/OpenMP/nvptx_target_codegen.cpp
@@ -577,6 +577,8 @@ int baz(int f, double &a) {
// CHECK: alloca i32,
// CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32,
// CHECK: [[ZERO_ADDR:%.+]] = alloca i32,
+ // CHECK: [[BND_ZERO_ADDR:%.+]] = alloca i32,
+ // CHECK: store i32 0, i32* [[BND_ZERO_ADDR]]
// CHECK: store i32 0, i32* [[ZERO_ADDR]]
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[UNKNOWN]]
// CHECK: [[PAR_LEVEL:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]])
@@ -609,7 +611,7 @@ int baz(int f, double &a) {
// CHECK: br i1
// CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]])
- // CHECK: call void [[OUTLINED:@.+]](i32* [[ZERO_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
+ // CHECK: call void [[OUTLINED:@.+]](i32* [[ZERO_ADDR]], i32* [[BND_ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}})
// CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]])
// CHECK: br label
diff --git a/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/test/OpenMP/nvptx_teams_reduction_codegen.cpp
index 96f4de8455..d17eee24ab 100644
--- a/test/OpenMP/nvptx_teams_reduction_codegen.cpp
+++ b/test/OpenMP/nvptx_teams_reduction_codegen.cpp
@@ -712,6 +712,7 @@ int bar(int n){
// CHECK-NOT: call void @__kmpc_get_team_static_memory
// CHECK: store i32 0,
+ // CHECK: store i32 0,
// CHECK: store i32 0, i32* [[A_ADDR:%.+]], align
// CHECK: store i16 -32768, i16* [[B_ADDR:%.+]], align
// CHECK: call void [[OUTLINED:@.+]](i32* {{.+}}, i32* {{.+}}, i32* [[A_ADDR]], i16* [[B_ADDR]])
diff --git a/test/OpenMP/parallel_if_codegen.cpp b/test/OpenMP/parallel_if_codegen.cpp
index ec9fc01561..d261b5b21c 100644
--- a/test/OpenMP/parallel_if_codegen.cpp
+++ b/test/OpenMP/parallel_if_codegen.cpp
@@ -29,12 +29,13 @@ void gtid_test() {
}
// CHECK: define internal {{.*}}void [[GTID_TEST_REGION1]](i{{.+}}* noalias [[GTID_PARAM:%.+]], i32* noalias
+// CHECK: store i32 0, i32* [[BND_ZERO_ADDR:%.+]],
// CHECK: store i32 0, i32* [[ZERO_ADDR:%.+]],
// CHECK: store i{{[0-9]+}}* [[GTID_PARAM]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]],
// CHECK: [[GTID_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_ADDR]]
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]])
-// CHECK: call void [[GTID_TEST_REGION2:@.+]](i{{[0-9]+}}* [[ZERO_ADDR]]
+// CHECK: call void [[GTID_TEST_REGION2:@.+]](i{{[0-9]+}}* [[ZERO_ADDR]], i{{[0-9]+}}* [[BND_ZERO_ADDR]]
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i{{.+}} [[GTID]])
// CHECK: ret void
@@ -55,14 +56,16 @@ int tmain(T Arg) {
// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
int main() {
+// CHECK: store i32 0, i32* [[BND_ZERO_ADDR2:%.+]],
// CHECK: store i32 0, i32* [[ZERO_ADDR2:%.+]],
+// CHECK: store i32 0, i32* [[BND_ZERO_ADDR1:%.+]],
// CHECK: store i32 0, i32* [[ZERO_ADDR1:%.+]],
// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN4:@.+]] to void
#pragma omp parallel if (true)
fn4();
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK: call void [[CAP_FN5:@.+]](i32* [[ZERO_ADDR1]], i32* [[ZERO_ADDR1]])
+// CHECK: call void [[CAP_FN5:@.+]](i32* [[ZERO_ADDR1]], i32* [[BND_ZERO_ADDR1]])
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
#pragma omp parallel if (false)
fn5();
@@ -73,7 +76,7 @@ int main() {
// CHECK: br label %[[OMP_END:.+]]
// CHECK: [[OMP_ELSE]]
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK: call void [[CAP_FN6]](i32* [[ZERO_ADDR2]], i32* [[ZERO_ADDR2]])
+// CHECK: call void [[CAP_FN6]](i32* [[ZERO_ADDR2]], i32* [[BND_ZERO_ADDR2]])
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
// CHECK: br label %[[OMP_END]]
// CHECK: [[OMP_END]]
@@ -96,12 +99,14 @@ int main() {
// CHECK: ret void
// CHECK-LABEL: define {{.+}} @{{.+}}tmain
+// CHECK: store i32 0, i32* [[BND_ZERO_ADDR2:%.+]],
// CHECK: store i32 0, i32* [[ZERO_ADDR2:%.+]],
+// CHECK: store i32 0, i32* [[BND_ZERO_ADDR1:%.+]],
// CHECK: store i32 0, i32* [[ZERO_ADDR1:%.+]],
// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN1:@.+]] to void
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK: call void [[CAP_FN2:@.+]](i32* [[ZERO_ADDR1]], i32* [[ZERO_ADDR1]])
+// CHECK: call void [[CAP_FN2:@.+]](i32* [[ZERO_ADDR1]], i32* [[BND_ZERO_ADDR1]])
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
// CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
// CHECK: [[OMP_THEN]]
@@ -109,7 +114,7 @@ int main() {
// CHECK: br label %[[OMP_END:.+]]
// CHECK: [[OMP_ELSE]]
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK: call void [[CAP_FN3]](i32* [[ZERO_ADDR2]], i32* [[ZERO_ADDR2]])
+// CHECK: call void [[CAP_FN3]](i32* [[ZERO_ADDR2]], i32* [[BND_ZERO_ADDR2]])
// CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
// CHECK: br label %[[OMP_END]]
// CHECK: [[OMP_END]]