diff options
5 files changed, 33 insertions, 29 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 8cf5bb2f44b..0ef093fd057 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -1554,29 +1554,22 @@ void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF, llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { - SourceLocation Loc = D.getLocStart(); - // Emit target region as a standalone region. class NVPTXPrePostActionTy : public PrePostActionTy { - SourceLocation &Loc; bool &IsInParallelRegion; bool PrevIsInParallelRegion; public: - NVPTXPrePostActionTy(SourceLocation &Loc, bool &IsInParallelRegion) - : Loc(Loc), IsInParallelRegion(IsInParallelRegion) {} + NVPTXPrePostActionTy(bool &IsInParallelRegion) + : IsInParallelRegion(IsInParallelRegion) {} void Enter(CodeGenFunction &CGF) override { - static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) - .emitGenericVarsProlog(CGF, Loc); PrevIsInParallelRegion = IsInParallelRegion; IsInParallelRegion = true; } void Exit(CodeGenFunction &CGF) override { IsInParallelRegion = PrevIsInParallelRegion; - static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime()) - .emitGenericVarsEpilog(CGF); } - } Action(Loc, IsInParallelRegion); + } Action(IsInParallelRegion); CodeGen.setAction(Action); bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion; IsInTargetMasterThreadRegion = false; diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp index c8a0f7a3e3b..7ef4f8af3de 100644 --- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp +++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp @@ -24,12 +24,11 @@ int maini1() { // parallel region // CHECK: define {{.*}}void @{{.*}}(i32* noalias {{.*}}, i32* noalias {{.*}}, i32* dereferenceable{{.*}}) -// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0) -// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]* -// CHECK: [[B_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack( +// CHECK: [[B_ADDR:%.+]] = alloca i32, // CHECK: call {{.*}}[[FOO:@.*foo.*]](i32* dereferenceable{{.*}} [[B_ADDR]]) // CHECK: call {{.*}}[[BAR:@.*bar.*]]() -// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]]) +// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack( // CHECK: ret void // CHECK: define {{.*}}[[FOO]](i32* dereferenceable{{.*}}) diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp index 634681b2f78..eb0acde2998 100644 --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -83,11 +83,10 @@ void test_ds(){ /// outlined function for the second parallel region /// // CK1: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.+}}, i32* dereferenceable{{.+}}) -// CK1: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0) -// CK1: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_TY:%.+]]* -// CK1: [[C_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_TY]], [[GLOBAL_TY]]* [[GLOBALS]], i32 0, i32 0 +// CK1-NOT: call i8* @__kmpc_data_sharing_push_stack( +// CK1: [[C_ADDR:%.+]] = alloca i32, // CK1: store i32* [[C_ADDR]], i32** % -// CK1: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]]) +// CK1i-NOT: call void @__kmpc_data_sharing_pop_stack( /// ========= In the data sharing wrapper function ========= /// diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp index 5dc440e46de..2b18f6d3f9b 100644 --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -44,8 +44,8 @@ int main(int argc, char **argv) { // CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[PTR]]) // CHECK: define internal void [[PARALLEL]]( -// CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0) +// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack( -// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[PTR]]) +// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack( #endif diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp index 2e712e2d51d..d75a1021090 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -34,20 +34,33 @@ int main(int argc, char **argv) { // CHECK: call void @__kmpc_spmd_kernel_deinit() -// CHECK: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.+}}, i32* dereferenceable{{.*}}) -// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 8, i16 0) -// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_TY:%.+]]* -// CHECK: [[I:%.+]] = getelementptr inbounds [[GLOBAL_TY]], [[GLOBAL_TY]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[ARGC_VAL:%.+]] = load i32, i32* % -// CHECK: [[ARGC:%.+]] = getelementptr inbounds [[GLOBAL_TY]], [[GLOBAL_TY]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: store i32 [[ARGC_VAL]], i32* [[ARGC]], +// CHECK: define internal void [[PARALLEL]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} %{{.+}}, i{{64|32}} [[ARGC:%.+]], i32* dereferenceable{{.*}}) +// CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack( +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[ARGC_ADDR:%.+]] = alloca i{{32|64}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: alloca i{{[0-9]+}}, +// CHECK: [[I:%.+]] = alloca i32, +// CHECK-32: store i32 [[ARGC]], i32* [[ARGC_ADDR]], +// CHECK-64: store i{{64|32}} [[ARGC]], i{{64|32}}* [[ARGC_ADDR]], +// CHECK-64: [[ARGC:%.+]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK: call void @__kmpc_for_static_init_4( // CHECK: call i32 [[FOO:@.+foo.+]](i32* [[I]]) // CHECK: call i32 [[FOO]](i32* %{{.+}}) -// CHECK: call i32 [[FOO]](i32* [[ARGC]]) +// CHECK-32: call i32 [[FOO]](i32* [[ARGC_ADDR]]) +// CHECK-64: call i32 [[FOO]](i32* [[ARGC]]) // CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]]) +// CHECK-NOT: call void @__kmpc_data_sharing_pop_stack( #endif |