summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-07-09 17:43:58 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-07-09 17:43:58 +0000
commitb99dcb5f31049381a7ca29def6edeb48da3217b0 (patch)
treea82991801b46b40459cac79a512640dc455708ad
parent47170b3153a4a494bbfd76d31e2fbb754dda34ec (diff)
downloadbcm5719-llvm-b99dcb5f31049381a7ca29def6edeb48da3217b0.tar.gz
bcm5719-llvm-b99dcb5f31049381a7ca29def6edeb48da3217b0.zip
[OPENMP, NVPTX] Do not globalize local variables in parallel regions.
In generic data-sharing mode we are allowed to not globalize local variables that escape their declaration context iff they are declared inside of the parallel region. We can do this because L2 parallel regions are executed sequentially and, thus, we do not need to put shared local variables in the global memory. llvm-svn: 336567
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp13
-rw-r--r--clang/test/OpenMP/declare_target_codegen_globalization.cpp7
-rw-r--r--clang/test/OpenMP/nvptx_data_sharing.cpp7
-rw-r--r--clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp4
-rw-r--r--clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp31
5 files changed, 33 insertions, 29 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 8cf5bb2f44b..0ef093fd057 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -1554,29 +1554,22 @@ void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
- SourceLocation Loc = D.getLocStart();
-
// Emit target region as a standalone region.
class NVPTXPrePostActionTy : public PrePostActionTy {
- SourceLocation &Loc;
bool &IsInParallelRegion;
bool PrevIsInParallelRegion;
public:
- NVPTXPrePostActionTy(SourceLocation &Loc, bool &IsInParallelRegion)
- : Loc(Loc), IsInParallelRegion(IsInParallelRegion) {}
+ NVPTXPrePostActionTy(bool &IsInParallelRegion)
+ : IsInParallelRegion(IsInParallelRegion) {}
void Enter(CodeGenFunction &CGF) override {
- static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
- .emitGenericVarsProlog(CGF, Loc);
PrevIsInParallelRegion = IsInParallelRegion;
IsInParallelRegion = true;
}
void Exit(CodeGenFunction &CGF) override {
IsInParallelRegion = PrevIsInParallelRegion;
- static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
- .emitGenericVarsEpilog(CGF);
}
- } Action(Loc, IsInParallelRegion);
+ } Action(IsInParallelRegion);
CodeGen.setAction(Action);
bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
IsInTargetMasterThreadRegion = false;
diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp
index c8a0f7a3e3b..7ef4f8af3de 100644
--- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp
+++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp
@@ -24,12 +24,11 @@ int maini1() {
// parallel region
// CHECK: define {{.*}}void @{{.*}}(i32* noalias {{.*}}, i32* noalias {{.*}}, i32* dereferenceable{{.*}})
-// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
-// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]*
-// CHECK: [[B_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
+// CHECK: [[B_ADDR:%.+]] = alloca i32,
// CHECK: call {{.*}}[[FOO:@.*foo.*]](i32* dereferenceable{{.*}} [[B_ADDR]])
// CHECK: call {{.*}}[[BAR:@.*bar.*]]()
-// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack(
// CHECK: ret void
// CHECK: define {{.*}}[[FOO]](i32* dereferenceable{{.*}})
diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp
index 634681b2f78..eb0acde2998 100644
--- a/clang/test/OpenMP/nvptx_data_sharing.cpp
+++ b/clang/test/OpenMP/nvptx_data_sharing.cpp
@@ -83,11 +83,10 @@ void test_ds(){
/// outlined function for the second parallel region ///
// CK1: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.+}}, i32* dereferenceable{{.+}})
-// CK1: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
-// CK1: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_TY:%.+]]*
-// CK1: [[C_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_TY]], [[GLOBAL_TY]]* [[GLOBALS]], i32 0, i32 0
+// CK1-NOT: call i8* @__kmpc_data_sharing_push_stack(
+// CK1: [[C_ADDR:%.+]] = alloca i32,
// CK1: store i32* [[C_ADDR]], i32** %
-// CK1: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CK1i-NOT: call void @__kmpc_data_sharing_pop_stack(
/// ========= In the data sharing wrapper function ========= ///
diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
index 5dc440e46de..2b18f6d3f9b 100644
--- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
@@ -44,8 +44,8 @@ int main(int argc, char **argv) {
// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[PTR]])
// CHECK: define internal void [[PARALLEL]](
-// CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0)
+// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
-// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[PTR]])
+// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack(
#endif
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
index 2e712e2d51d..d75a1021090 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
@@ -34,20 +34,33 @@ int main(int argc, char **argv) {
// CHECK: call void @__kmpc_spmd_kernel_deinit()
-// CHECK: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.+}}, i32* dereferenceable{{.*}})
-// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 8, i16 0)
-// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_TY:%.+]]*
-// CHECK: [[I:%.+]] = getelementptr inbounds [[GLOBAL_TY]], [[GLOBAL_TY]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[ARGC_VAL:%.+]] = load i32, i32* %
-// CHECK: [[ARGC:%.+]] = getelementptr inbounds [[GLOBAL_TY]], [[GLOBAL_TY]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: store i32 [[ARGC_VAL]], i32* [[ARGC]],
+// CHECK: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} [[ARGC:%.+]], i32* dereferenceable{{.*}})
+// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack(
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[ARGC_ADDR:%.+]] = alloca i{{32|64}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[I:%.+]] = alloca i32,
+// CHECK-32: store i32 [[ARGC]], i32* [[ARGC_ADDR]],
+// CHECK-64: store i{{64|32}} [[ARGC]], i{{64|32}}* [[ARGC_ADDR]],
+// CHECK-64: [[ARGC:%.+]] = bitcast i64* [[ARGC_ADDR]] to i32*
// CHECK: call void @__kmpc_for_static_init_4(
// CHECK: call i32 [[FOO:@.+foo.+]](i32* [[I]])
// CHECK: call i32 [[FOO]](i32* %{{.+}})
-// CHECK: call i32 [[FOO]](i32* [[ARGC]])
+// CHECK-32: call i32 [[FOO]](i32* [[ARGC_ADDR]])
+// CHECK-64: call i32 [[FOO]](i32* [[ARGC]])
// CHECK: call void @__kmpc_for_static_fini(
-// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack(
#endif
OpenPOWER on IntegriCloud