diff options
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 98 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 12 | ||||
-rw-r--r-- | clang/test/OpenMP/nvptx_parallel_codegen.cpp | 9 |
3 files changed, 70 insertions, 49 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 9452bdea4c7..e3cec13f7d5 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -184,9 +184,10 @@ class CheckVarsEscapingDeclContext final llvm::SetVector<const ValueDecl *> EscapedDecls; llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls; llvm::SmallPtrSet<const Decl *, 4> EscapedParameters; - bool AllEscaped = false; RecordDecl *GlobalizedRD = nullptr; llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields; + bool AllEscaped = false; + bool IsForParallelRegion = false; static llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> isDeclareTargetDeclaration(const ValueDecl *VD) { @@ -207,23 +208,32 @@ class CheckVarsEscapingDeclContext final // Variables captured by value must be globalized. if (auto *CSI = CGF.CapturedStmtInfo) { if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) { - if (!FD->hasAttrs()) - return; - const auto *Attr = FD->getAttr<OMPCaptureKindAttr>(); - if (!Attr) - return; - if (!isOpenMPPrivate( - static_cast<OpenMPClauseKind>(Attr->getCaptureKind())) || - Attr->getCaptureKind() == OMPC_map) - return; - if (FD->getType()->isReferenceType()) + // Check if need to capture the variable that was already captured by + // value in the outer region. + if (!IsForParallelRegion) { + if (!FD->hasAttrs()) + return; + const auto *Attr = FD->getAttr<OMPCaptureKindAttr>(); + if (!Attr) + return; + if (!isOpenMPPrivate( + static_cast<OpenMPClauseKind>(Attr->getCaptureKind())) || + Attr->getCaptureKind() == OMPC_map) + return; + } + if (!FD->getType()->isReferenceType()) { + assert(!VD->getType()->isVariablyModifiedType() && + "Parameter captured by value with variably modified type"); + EscapedParameters.insert(VD); + } else if (!IsForParallelRegion) { return; - assert(!VD->getType()->isVariablyModifiedType() && - "Parameter captured by value with variably modified type"); - EscapedParameters.insert(VD); + } } - } else if (VD->getType()->isReferenceType()) - // Do not globalize variables with reference or pointer type. + } + if ((!CGF.CapturedStmtInfo || + (IsForParallelRegion && CGF.CapturedStmtInfo)) && + VD->getType()->isReferenceType()) + // Do not globalize variables with reference type. return; if (VD->getType()->isVariablyModifiedType()) EscapedVariableLengthDecls.insert(VD); @@ -243,15 +253,18 @@ class CheckVarsEscapingDeclContext final } } } - void VisitOpenMPCapturedStmt(const CapturedStmt *S) { + void VisitOpenMPCapturedStmt(const CapturedStmt *S, bool IsParallelRegion) { if (!S) return; for (const CapturedStmt::Capture &C : S->captures()) { if (C.capturesVariable() && !C.capturesVariableByCopy()) { const ValueDecl *VD = C.getCapturedVar(); + bool SavedIsParallelRegion = IsForParallelRegion; + IsForParallelRegion = IsParallelRegion; markAsEscaped(VD); if (isa<OMPCapturedExprDecl>(VD)) VisitValueDecl(VD); + IsForParallelRegion = SavedIsParallelRegion; } } } @@ -316,20 +329,19 @@ public: void VisitOMPExecutableDirective(const OMPExecutableDirective *D) { if (!D) return; - if (D->hasAssociatedStmt()) { - if (const auto *S = - dyn_cast_or_null<CapturedStmt>(D->getAssociatedStmt())) { - // Do not analyze directives that do not actually require capturing, - // like `omp for` or `omp simd` directives. - llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; - getOpenMPCaptureRegions(CaptureRegions, D->getDirectiveKind()); - if (CaptureRegions.size() == 1 && - CaptureRegions.back() == OMPD_unknown) { - VisitStmt(S->getCapturedStmt()); - return; - } - VisitOpenMPCapturedStmt(S); + if (!D->hasAssociatedStmt()) + return; + if (const auto *S = + dyn_cast_or_null<CapturedStmt>(D->getAssociatedStmt())) { + // Do not analyze directives that do not actually require capturing, + // like `omp for` or `omp simd` directives. + llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; + getOpenMPCaptureRegions(CaptureRegions, D->getDirectiveKind()); + if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) { + VisitStmt(S->getCapturedStmt()); + return; } + VisitOpenMPCapturedStmt(S, CaptureRegions.back() == OMPD_parallel); } } void VisitCapturedStmt(const CapturedStmt *S) { @@ -551,9 +563,9 @@ static void syncParallelThreads(CodeGenFunction &CGF, llvm::Value *NumThreads) { /// CTA. The threads in the last warp are reserved for master execution. /// For the 'spmd' execution mode, all threads in a CTA are part of the team. static llvm::Value *getThreadLimit(CodeGenFunction &CGF, - bool IsInSpmdExecutionMode = false) { + bool IsInSPMDExecutionMode = false) { CGBuilderTy &Bld = CGF.Builder; - return IsInSpmdExecutionMode + return IsInSPMDExecutionMode ? getNVPTXNumThreads(CGF) : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF), "thread_limit"); @@ -930,7 +942,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryFooter(CodeGenFunction &CGF, EST.ExitBB = nullptr; } -void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D, +void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, @@ -951,10 +963,10 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D, const OMPExecutableDirective &D) : RT(RT), EST(EST), D(D) {} void Enter(CodeGenFunction &CGF) override { - RT.emitSpmdEntryHeader(CGF, EST, D); + RT.emitSPMDEntryHeader(CGF, EST, D); } void Exit(CodeGenFunction &CGF) override { - RT.emitSpmdEntryFooter(CGF, EST); + RT.emitSPMDEntryFooter(CGF, EST); } } Action(*this, EST, D); CodeGen.setAction(Action); @@ -962,7 +974,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKernel(const OMPExecutableDirective &D, IsOffloadEntry, CodeGen); } -void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader( +void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader( CodeGenFunction &CGF, EntryFunctionState &EST, const OMPExecutableDirective &D) { CGBuilderTy &Bld = CGF.Builder; @@ -974,7 +986,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader( // Initialize the OMP state in the runtime; called by all active threads. // TODO: Set RequiresOMPRuntime and RequiresDataSharing parameters // based on code analysis of the target region. - llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSpmdExecutionMode=*/true), + llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true), /*RequiresOMPRuntime=*/Bld.getInt16(1), /*RequiresDataSharing=*/Bld.getInt16(1)}; CGF.EmitRuntimeCall( @@ -986,7 +998,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntryHeader( IsInTargetMasterThreadRegion = true; } -void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF, +void CGOpenMPRuntimeNVPTX::emitSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST) { IsInTargetMasterThreadRegion = false; if (!CGF.HaveInsertPoint()) @@ -1465,7 +1477,7 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D); if (Mode) - emitSpmdKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, + emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); else emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, @@ -1483,7 +1495,7 @@ CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc) { - // Do nothing in case of Spmd mode and L0 parallel. + // Do nothing in case of SPMD mode and L0 parallel. if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) return; @@ -1493,7 +1505,7 @@ void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF, void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) { - // Do nothing in case of Spmd mode and L0 parallel. + // Do nothing in case of SPMD mode and L0 parallel. if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) return; @@ -1718,7 +1730,7 @@ void CGOpenMPRuntimeNVPTX::emitParallelCall( return; if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) - emitSpmdParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); + emitSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); else emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond); } @@ -1904,7 +1916,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall( } } -void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall( +void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall( CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { // Just call the outlined function to execute the parallel region. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index c7d647bfdb1..f83e99f8a3b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -77,12 +77,12 @@ private: /// Helper for generic variables globalization epilog. void emitGenericVarsEpilog(CodeGenFunction &CGF); - /// Helper for Spmd mode target directive's entry function. - void emitSpmdEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, + /// Helper for SPMD mode target directive's entry function. + void emitSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, const OMPExecutableDirective &D); - /// Signal termination of Spmd mode execution. - void emitSpmdEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); + /// Signal termination of SPMD mode execution. + void emitSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); // // Base class overrides. @@ -119,7 +119,7 @@ private: /// \param CodeGen Object containing the target statements. /// An outlined function may not be an entry if, e.g. the if clause always /// evaluates to false. - void emitSpmdKernel(const OMPExecutableDirective &D, StringRef ParentName, + void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen); @@ -166,7 +166,7 @@ private: /// \param IfCond Condition in the associated 'if' clause, if it was /// specified, nullptr otherwise. /// - void emitSpmdParallelCall(CodeGenFunction &CGF, SourceLocation Loc, + void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond); diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index 8f496eb80e4..d1a3104407d 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -315,6 +315,15 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l54}}_worker() // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l54}}( +// CHECK-32: [[A_ADDR:%.+]] = alloca i32, +// CHECK-64: [[A_ADDR:%.+]] = alloca i64, +// CHECK-64: [[CONV:%.+]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK: [[STACK:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0) +// CHECK: [[BC:%.+]] = bitcast i8* [[STACK]] to %struct._globalized_locals_ty* +// CHECK-32: [[A:%.+]] = load i32, i32* [[A_ADDR]], +// CHECK-64: [[A:%.+]] = load i32, i32* [[CONV]], +// CHECK: [[GLOBAL_A_ADDR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[BC]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: store i32 [[A]], i32* [[GLOBAL_A_ADDR]], // CHECK-LABEL: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable{{.*}}) // CHECK: [[CC:%.+]] = alloca i32, |