summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-10-05 15:08:53 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-10-05 15:08:53 +0000
commitfd006c44eeffe1f018f7fda13eacb67a6606ec9c (patch)
treef109d0337a0888e5048c30920ed8e1cec54a72bd
parent00216bca66242fadfd22165d4efd1479d1d2ae82 (diff)
downloadbcm5719-llvm-fd006c44eeffe1f018f7fda13eacb67a6606ec9c.tar.gz
bcm5719-llvm-fd006c44eeffe1f018f7fda13eacb67a6606ec9c.zip
[OPENMP] Fix emission of the __kmpc_global_thread_num.
Fixed emission of the __kmpc_global_thread_num() so that it is not messed up with alloca instructions anymore. Plus, fixes emission of the __kmpc_global_thread_num() functions in the target outlined regions so that they are not called before runtime is initialized. llvm-svn: 343856
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp39
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h6
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp3
-rw-r--r--clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp2
-rw-r--r--clang/test/OpenMP/parallel_if_codegen.cpp4
-rw-r--r--clang/test/OpenMP/single_codegen.cpp6
-rw-r--r--clang/test/OpenMP/single_firstprivate_codegen.cpp2
-rw-r--r--clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp2
-rw-r--r--clang/test/OpenMP/taskloop_reduction_codegen.cpp2
-rw-r--r--clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp2
10 files changed, 54 insertions, 14 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index d421729faa2..c00f66dd030 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1485,6 +1485,31 @@ Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
return Address(Entry, Align);
}
+void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
+ bool AtCurrentPoint) {
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
+
+ llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
+ if (AtCurrentPoint) {
+ Elem.second.ServiceInsertPt = new llvm::BitCastInst(
+ Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
+ } else {
+ Elem.second.ServiceInsertPt =
+ new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
+ Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
+ }
+}
+
+void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ if (Elem.second.ServiceInsertPt) {
+ llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
+ Elem.second.ServiceInsertPt = nullptr;
+ Ptr->eraseFromParent();
+ }
+}
+
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
SourceLocation Loc,
unsigned Flags) {
@@ -1511,8 +1536,10 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
Elem.second.DebugLoc = AI.getPointer();
LocValue = AI;
+ if (!Elem.second.ServiceInsertPt)
+ setLocThreadIdInsertPt(CGF);
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
- CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+ CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
CGF.getTypeSize(IdentQTy));
}
@@ -1582,21 +1609,25 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
// kmpc_global_thread_num(ident_t *loc).
// Generate thread id value and cache this value for use across the
// function.
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ if (!Elem.second.ServiceInsertPt)
+ setLocThreadIdInsertPt(CGF);
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
- CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+ CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
llvm::CallInst *Call = CGF.Builder.CreateCall(
createRuntimeFunction(OMPRTL__kmpc_global_thread_num),
emitUpdateLocation(CGF, Loc));
Call->setCallingConv(CGF.getRuntimeCC());
- auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
Elem.second.ThreadID = Call;
return Call;
}
void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
assert(CGF.CurFn && "No function in current CodeGenFunction.");
- if (OpenMPLocThreadIDMap.count(CGF.CurFn))
+ if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
+ clearLocThreadIdInsertPt(CGF);
OpenMPLocThreadIDMap.erase(CGF.CurFn);
+ }
if (FunctionUDRMap.count(CGF.CurFn) > 0) {
for(auto *D : FunctionUDRMap[CGF.CurFn])
UDRMap.erase(D);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 35f75a9ec08..3fb1a1538fa 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -278,6 +278,10 @@ protected:
/// stored.
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
+ void setLocThreadIdInsertPt(CodeGenFunction &CGF,
+ bool AtCurrentPoint = false);
+ void clearLocThreadIdInsertPt(CodeGenFunction &CGF);
+
private:
/// Default const ident_t object used for initialization of all other
/// ident_t objects.
@@ -300,6 +304,8 @@ private:
struct DebugLocThreadIdTy {
llvm::Value *DebugLoc;
llvm::Value *ThreadID;
+ /// Insert point for the service instructions.
+ llvm::AssertingVH<llvm::Instruction> ServiceInsertPt = nullptr;
};
/// Map of local debug location, ThreadId and functions.
typedef llvm::DenseMap<llvm::Function *, DebugLocThreadIdTy>
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index a923232e817..f0f0a735f8a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -1197,8 +1197,11 @@ void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D,
: RT(RT), EST(EST), D(D) {}
void Enter(CodeGenFunction &CGF) override {
RT.emitSPMDEntryHeader(CGF, EST, D);
+ // Skip target region initialization.
+ RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
}
void Exit(CodeGenFunction &CGF) override {
+ RT.clearLocThreadIdInsertPt(CGF);
RT.emitSPMDEntryFooter(CGF, EST);
}
} Action(*this, EST, D);
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
index d75a1021090..94644753253 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
@@ -22,8 +22,8 @@ int main(int argc, char **argv) {
// CHECK: @__omp_offloading_{{.*}}_main_l16_exec_mode = weak constant i8 0
// CHECK: define weak void @__omp_offloading_{{.*}}_main_l16(i{{64|32}} %{{[^,].*}}, i32* dereferenceable{{[^,]*}}, i{{64|32}} %{{[^,)]*}})
-// CHECK: [[TID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @
// CHECK: call void @__kmpc_spmd_kernel_init(
+// CHECK: [[TID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call void [[PARALLEL:@.+]](i32* %{{.*}}, i32* %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.*}}, i{{64|32}} %{{.*}}, i32* %{{.*}})
diff --git a/clang/test/OpenMP/parallel_if_codegen.cpp b/clang/test/OpenMP/parallel_if_codegen.cpp
index 49e4bd402ce..ec9fc01561b 100644
--- a/clang/test/OpenMP/parallel_if_codegen.cpp
+++ b/clang/test/OpenMP/parallel_if_codegen.cpp
@@ -55,9 +55,9 @@ int tmain(T Arg) {
// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
int main() {
-// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
// CHECK: store i32 0, i32* [[ZERO_ADDR2:%.+]],
// CHECK: store i32 0, i32* [[ZERO_ADDR1:%.+]],
+// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN4:@.+]] to void
#pragma omp parallel if (true)
fn4();
@@ -96,9 +96,9 @@ int main() {
// CHECK: ret void
// CHECK-LABEL: define {{.+}} @{{.+}}tmain
-// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
// CHECK: store i32 0, i32* [[ZERO_ADDR2:%.+]],
// CHECK: store i32 0, i32* [[ZERO_ADDR1:%.+]],
+// CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN1:@.+]] to void
// CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
// CHECK: call void [[CAP_FN2:@.+]](i32* [[ZERO_ADDR1]], i32* [[ZERO_ADDR1]])
diff --git a/clang/test/OpenMP/single_codegen.cpp b/clang/test/OpenMP/single_codegen.cpp
index a5426bf483e..df5b2ca0e67 100644
--- a/clang/test/OpenMP/single_codegen.cpp
+++ b/clang/test/OpenMP/single_codegen.cpp
@@ -74,9 +74,12 @@ struct SST {
// CHECK-LABEL: @main
// TERM_DEBUG-LABEL: @main
int main() {
+ // CHECK: alloca i32
// CHECK-DAG: [[A_ADDR:%.+]] = alloca i8
// CHECK-DAG: [[A2_ADDR:%.+]] = alloca [2 x i8]
// CHECK-DAG: [[C_ADDR:%.+]] = alloca [[TEST_CLASS_TY]]
+ // CHECK-DAG: [[DID_IT:%.+]] = alloca i32,
+ // CHECK-DAG: [[COPY_LIST:%.+]] = alloca [5 x i8*],
char a;
char a2[2];
TestClass &c = tc;
@@ -84,9 +87,6 @@ int main() {
SS ss(c.a);
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
-// CHECK-DAG: [[DID_IT:%.+]] = alloca i32,
-// CHECK-DAG: [[COPY_LIST:%.+]] = alloca [5 x i8*],
-
// CHECK: [[RES:%.+]] = call i32 @__kmpc_single([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
// CHECK-NEXT: [[IS_SINGLE:%.+]] = icmp ne i32 [[RES]], 0
// CHECK-NEXT: br i1 [[IS_SINGLE]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
diff --git a/clang/test/OpenMP/single_firstprivate_codegen.cpp b/clang/test/OpenMP/single_firstprivate_codegen.cpp
index a59e1a775a0..3b738253904 100644
--- a/clang/test/OpenMP/single_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/single_firstprivate_codegen.cpp
@@ -178,13 +178,13 @@ int main() {
// CHECK: define {{.*}}i{{[0-9]+}} @main()
// CHECK: alloca i{{[0-9]+}},
-// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
// CHECK: call i32 @__kmpc_single(
// firstprivate t_var(t_var)
// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR]],
diff --git a/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp b/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp
index 90b03c653ef..02a5ba664e5 100644
--- a/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp
@@ -43,12 +43,12 @@ int main(int argc, char **argv) {
// CHECK: [[A:%.+]] = alloca i32,
// CHECK: [[B:%.+]] = alloca float,
// CHECK: [[C:%.+]] = alloca [5 x %struct.S],
-// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
// CHECK: [[RD_IN1:%.+]] = alloca [3 x [[T1:%[^,]+]]],
// CHECK: [[TD1:%.+]] = alloca i8*,
// CHECK: [[RD_IN2:%.+]] = alloca [2 x [[T2:%[^,]+]]],
// CHECK: [[TD2:%.+]] = alloca i8*,
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
// CHECK: [[VLA:%.+]] = alloca i16, i64 [[VLA_SIZE:%[^,]+]],
// CHECK: call void @__kmpc_taskgroup(%struct.ident_t* {{[^,]+}}, i32 [[GTID]])
diff --git a/clang/test/OpenMP/taskloop_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_reduction_codegen.cpp
index 65bc50a4d25..3a3434a4540 100644
--- a/clang/test/OpenMP/taskloop_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_reduction_codegen.cpp
@@ -52,11 +52,11 @@ sum = 0.0;
// CHECK: [[C:%.*]] = alloca [100 x %struct.S],
// CHECK: [[D:%.*]] = alloca float*,
// CHECK: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]],
-// CHECK: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
// CHECK: [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t],
// CHECK: alloca i32,
// CHECK: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32,
// CHECK: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32,
+// CHECK: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
// CHECK: store i32 0, i32* [[RETVAL]],
// CHECK: store i32 [[ARGC:%.*]], i32* [[ARGC_ADDR]],
// CHECK: store i8** [[ARGV:%.*]], i8*** [[ARGV_ADDR]],
diff --git a/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp
index 6c34e34ca58..d2c4b8b3113 100644
--- a/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp
@@ -52,11 +52,11 @@ sum = 0.0;
// CHECK: [[C:%.*]] = alloca [100 x %struct.S],
// CHECK: [[D:%.*]] = alloca float*,
// CHECK: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]],
-// CHECK: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
// CHECK: [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t],
// CHECK: alloca i32,
// CHECK: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32,
// CHECK: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32,
+// CHECK: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
// CHECK: store i32 0, i32* [[RETVAL]],
// CHECK: store i32 [[ARGC:%.*]], i32* [[ARGC_ADDR]],
// CHECK: store i8** [[ARGV:%.*]], i8*** [[ARGV_ADDR]],
OpenPOWER on IntegriCloud