diff options
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGExpr.cpp | 3 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGExprComplex.cpp | 7 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGExprScalar.cpp | 4 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 263 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 57 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 26 |
6 files changed, 357 insertions, 3 deletions
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index c729037852e..e43ed5030bc 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4717,6 +4717,9 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) { if (RV.isScalar()) EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc()); EmitStoreThroughLValue(RV, LV); + if (getLangOpts().OpenMP) + CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this, + E->getLHS()); return LV; } diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 6b119697715..f7a4e9e9471 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "clang/AST/StmtVisitor.h" @@ -1136,7 +1137,11 @@ ComplexPairTy CodeGenFunction::EmitLoadOfComplex(LValue src, LValue CodeGenFunction::EmitComplexAssignmentLValue(const BinaryOperator *E) { assert(E->getOpcode() == BO_Assign); ComplexPairTy Val; // ignored - return ComplexExprEmitter(*this).EmitBinAssignLValue(E, Val); + LValue LVal = ComplexExprEmitter(*this).EmitBinAssignLValue(E, Val); + if (getLangOpts().OpenMP) + CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this, + E->getLHS()); + return LVal; } typedef ComplexPairTy (ComplexExprEmitter::*CompoundFunc)( diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 12bf37e5343..d759d3682ce 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -14,6 +14,7 @@ #include "CGCleanup.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" +#include "CGOpenMPRuntime.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" @@ -2997,6 +2998,9 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( else CGF.EmitStoreThroughLValue(RValue::get(Result), LHSLV); + if (CGF.getLangOpts().OpenMP) + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, + E->getLHS()); return LHSLV; } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 59f352dcd4c..735cacf0b7d 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -19,6 +19,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/AST/StmtVisitor.h" #include "clang/Basic/BitmaskEnum.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/ArrayRef.h" @@ -11401,6 +11402,268 @@ bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); } +CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( + CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) + : CGM(CGF.CGM), + NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), + [](const OMPLastprivateClause *C) { + return C->getKind() == + OMPC_LASTPRIVATE_conditional; + })) { + assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); + if (!NeedToPush) + return; + LastprivateConditionalData &Data = + CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); + for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { + if (C->getKind() != OMPC_LASTPRIVATE_conditional) + continue; + + for (const Expr *Ref : C->varlists()) { + Data.DeclToUniqeName.try_emplace( + cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), + generateUniqueName(CGM, "pl_cond", Ref)); + } + } + Data.IVLVal = IVLVal; + // In simd only mode or for simd directives no need to generate threadprivate + // references for the loop iteration counter, we can use the original one + // since outlining cannot happen in simd regions. + if (CGF.getLangOpts().OpenMPSimd || + isOpenMPSimdDirective(S.getDirectiveKind())) { + Data.UseOriginalIV = true; + return; + } + llvm::SmallString<16> Buffer; + llvm::raw_svector_ostream OS(Buffer); + PresumedLoc PLoc = + CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc()); + assert(PLoc.isValid() && "Source location is expected to be always valid."); + + llvm::sys::fs::UniqueID ID; + if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) + CGM.getDiags().Report(diag::err_cannot_open_file) + << PLoc.getFilename() << EC.message(); + OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_" + << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv"; + Data.IVName = OS.str(); + + // Global loop counter. Required to handle inner parallel-for regions. + // global_iv = &iv; + QualType PtrIVTy = CGM.getContext().getPointerType(IVLVal.getType()); + Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( + CGF, PtrIVTy, Data.IVName); + LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, PtrIVTy); + CGF.EmitStoreOfScalar(IVLVal.getPointer(CGF), GlobIVLVal); +} + +CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); +} + +namespace { +/// Checks if the lastprivate conditional variable is referenced in LHS. +class LastprivateConditionalRefChecker final + : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { + CodeGenFunction &CGF; + ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; + const Expr *FoundE = nullptr; + const Decl *FoundD = nullptr; + StringRef UniqueDeclName; + LValue IVLVal; + StringRef IVName; + SourceLocation Loc; + bool UseOriginalIV = false; + +public: + bool VisitDeclRefExpr(const DeclRefExpr *E) { + for (const CGOpenMPRuntime::LastprivateConditionalData &D : + llvm::reverse(LPM)) { + auto It = D.DeclToUniqeName.find(E->getDecl()); + if (It == D.DeclToUniqeName.end()) + continue; + FoundE = E; + FoundD = E->getDecl()->getCanonicalDecl(); + UniqueDeclName = It->getSecond(); + IVLVal = D.IVLVal; + IVName = D.IVName; + UseOriginalIV = D.UseOriginalIV; + break; + } + return FoundE == E; + } + bool VisitMemberExpr(const MemberExpr *E) { + if (!CGF.IsWrappedCXXThis(E->getBase())) + return false; + for (const CGOpenMPRuntime::LastprivateConditionalData &D : + llvm::reverse(LPM)) { + auto It = D.DeclToUniqeName.find(E->getMemberDecl()); + if (It == D.DeclToUniqeName.end()) + continue; + FoundE = E; + FoundD = E->getMemberDecl()->getCanonicalDecl(); + UniqueDeclName = It->getSecond(); + IVLVal = D.IVLVal; + IVName = D.IVName; + UseOriginalIV = D.UseOriginalIV; + break; + } + return FoundE == E; + } + bool VisitStmt(const Stmt *S) { + for (const Stmt *Child : S->children()) { + if (!Child) + continue; + if (const auto *E = dyn_cast<Expr>(Child)) + if (!E->isGLValue()) + continue; + if (Visit(Child)) + return true; + } + return false; + } + explicit LastprivateConditionalRefChecker( + CodeGenFunction &CGF, + ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) + : CGF(CGF), LPM(LPM) {} + std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool> + getFoundData() const { + return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, + UseOriginalIV); + } +}; +} // namespace + +void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, + const Expr *LHS) { + if (CGF.getLangOpts().OpenMP < 50) + return; + LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack); + if (!Checker.Visit(LHS)) + return; + const Expr *FoundE; + const Decl *FoundD; + StringRef UniqueDeclName; + LValue IVLVal; + StringRef IVName; + bool UseOriginalIV; + std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) = + Checker.getFoundData(); + + // Last updated loop counter for the lastprivate conditional var. + // int<xx> last_iv = 0; + llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); + llvm::Constant *LastIV = + getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv"); + cast<llvm::GlobalVariable>(LastIV)->setAlignment( + IVLVal.getAlignment().getAsAlign()); + LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); + + // Private address of the lastprivate conditional in the current context. + // priv_a + LValue LVal = CGF.EmitLValue(FoundE); + // Last value of the lastprivate conditional. + // decltype(priv_a) last_a; + llvm::Constant *Last = getOrCreateInternalVariable( + LVal.getAddress(CGF).getElementType(), UniqueDeclName); + cast<llvm::GlobalVariable>(Last)->setAlignment( + LVal.getAlignment().getAsAlign()); + LValue LastLVal = + CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); + + // Global loop counter. Required to handle inner parallel-for regions. + // global_iv + if (!UseOriginalIV) { + QualType PtrIVTy = CGM.getContext().getPointerType(IVLVal.getType()); + Address IVAddr = getAddrOfArtificialThreadPrivate(CGF, PtrIVTy, IVName); + IVLVal = + CGF.EmitLoadOfPointerLValue(IVAddr, PtrIVTy->castAs<PointerType>()); + } + llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc()); + + // #pragma omp critical(a) + // if (last_iv <= iv) { + // last_iv = iv; + // last_a = priv_a; + // } + auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, + FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + llvm::Value *LastIVVal = + CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc()); + // (last_iv <= global_iv) ? Check if the variable is updated and store new + // value in global var. + llvm::Value *CmpRes; + if (IVLVal.getType()->isSignedIntegerType()) { + CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); + } else { + assert(IVLVal.getType()->isUnsignedIntegerType() && + "Loop iteration variable must be integer."); + CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); + } + llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); + CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); + // { + CGF.EmitBlock(ThenBB); + + // last_iv = global_iv; + CGF.EmitStoreOfScalar(IVVal, LastIVLVal); + + // last_a = priv_a; + switch (CGF.getEvaluationKind(LVal.getType())) { + case TEK_Scalar: { + llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc()); + CGF.EmitStoreOfScalar(PrivVal, LastLVal); + break; + } + case TEK_Complex: { + CodeGenFunction::ComplexPairTy PrivVal = + CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc()); + CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); + break; + } + case TEK_Aggregate: + llvm_unreachable( + "Aggregates are not supported in lastprivate conditional."); + } + // } + CGF.EmitBranch(ExitBB); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ExitBB, /*IsFinished=*/true); + }; + + if (CGM.getLangOpts().OpenMPSimd) { + // Do not emit as a critical region as no parallel region could be emitted. + RegionCodeGenTy ThenRCG(CodeGen); + ThenRCG(CGF); + } else { + emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc()); + } +} + +void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( + CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, + SourceLocation Loc) { + if (CGF.getLangOpts().OpenMP < 50) + return; + auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD); + assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() && + "Unknown lastprivate conditional variable."); + StringRef UniqueName = It->getSecond(); + llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); + // The variable was not updated in the region - exit. + if (!GV) + return; + LValue LPLVal = CGF.MakeAddrLValue( + GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); + llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); + CGF.EmitStoreOfScalar(Res, PrivLVal); +} + llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index bcaa06aab54..2a6a6b9e19c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -222,6 +222,33 @@ public: ~NontemporalDeclsRAII(); }; + /// Maps the expression for the lastprivate variable to the global copy used + /// to store new value because original variables are not mapped in inner + /// parallel regions. Only private copies are captured but we need also to + /// store private copy in shared address. + /// Also, stores the expression for the private loop counter and it + /// threaprivate name. + struct LastprivateConditionalData { + llvm::SmallDenseMap<CanonicalDeclPtr<const Decl>, SmallString<16>> + DeclToUniqeName; + LValue IVLVal; + SmallString<16> IVName; + /// True if original lvalue for loop counter can be used in codegen (simd + /// region or simd only mode) and no need to create threadprivate + /// references. + bool UseOriginalIV = false; + }; + /// Manages list of lastprivate conditional decls for the specified directive. + class LastprivateConditionalRAII { + CodeGenModule &CGM; + const bool NeedToPush; + + public: + LastprivateConditionalRAII(CodeGenFunction &CGF, + const OMPExecutableDirective &S, LValue IVLVal); + ~LastprivateConditionalRAII(); + }; + protected: CodeGenModule &CGM; StringRef FirstSeparator, Separator; @@ -666,6 +693,11 @@ private: /// The set is the union of all current stack elements. llvm::SmallVector<NontemporalDeclsSet, 4> NontemporalDeclsStack; + /// Stack for list of addresses of declarations in current context marked as + /// lastprivate conditional. The set is the union of all current stack + /// elements. + llvm::SmallVector<LastprivateConditionalData, 4> LastprivateConditionalStack; + /// Flag for keeping track of weather a requires unified_shared_memory /// directive is present. bool HasRequiresUnifiedSharedMemory = false; @@ -1683,6 +1715,31 @@ public: /// Checks if the \p VD variable is marked as nontemporal declaration in /// current context. bool isNontemporalDecl(const ValueDecl *VD) const; + + /// Checks if the provided \p LVal is lastprivate conditional and emits the + /// code to update the value of the original variable. + /// \code + /// lastprivate(conditional: a) + /// ... + /// <type> a; + /// lp_a = ...; + /// #pragma omp critical(a) + /// if (last_iv_a <= iv) { + /// last_iv_a = iv; + /// global_a = lp_a; + /// } + /// \endcode + virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, + const Expr *LHS); + + /// Gets the address of the global copy used for lastprivate conditional + /// update, if any. + /// \param PrivLVal LValue for the private copy. + /// \param VD Original lastprivate declaration. + virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, + LValue PrivLVal, + const VarDecl *VD, + SourceLocation Loc); }; /// Class supports emissionof SIMD-only code. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index a38a79b6454..cac0e7d4ed6 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1045,6 +1045,18 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( llvm::BasicBlock *ThenBB = nullptr; llvm::BasicBlock *DoneBB = nullptr; if (IsLastIterCond) { + // Emit implicit barrier if at least one lastprivate conditional is found + // and this is not a simd mode. + if (!getLangOpts().OpenMPSimd && + llvm::any_of(D.getClausesOfKind<OMPLastprivateClause>(), + [](const OMPLastprivateClause *C) { + return C->getKind() == OMPC_LASTPRIVATE_conditional; + })) { + CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), + OMPD_unknown, + /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } ThenBB = createBasicBlock(".omp.lastprivate.then"); DoneBB = createBasicBlock(".omp.lastprivate.done"); Builder.CreateCondBr(IsLastIterCond, ThenBB, DoneBB); @@ -1083,14 +1095,19 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); const auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); - // Get the address of the original variable. - Address OriginalAddr = GetAddrOfLocalVar(DestVD); // Get the address of the private variable. Address PrivateAddr = GetAddrOfLocalVar(PrivateVD); if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) PrivateAddr = Address(Builder.CreateLoad(PrivateAddr), getNaturalTypeAlignment(RefTy->getPointeeType())); + // Store the last value to the private copy in the last iteration. + if (C->getKind() == OMPC_LASTPRIVATE_conditional) + CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( + *this, MakeAddrLValue(PrivateAddr, (*IRef)->getType()), PrivateVD, + (*IRef)->getExprLoc()); + // Get the address of the original variable. + Address OriginalAddr = GetAddrOfLocalVar(DestVD); EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp); } ++IRef; @@ -1974,6 +1991,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitOMPLinearClause(S, LoopScope); CGF.EmitOMPPrivateClause(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); + CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( + CGF, S, CGF.EmitLValue(S.getIterationVariable())); bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); (void)LoopScope.Privatize(); if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) @@ -2546,6 +2565,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( /*ForceSimpleCall=*/true); } EmitOMPPrivateClause(S, LoopScope); + CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( + *this, S, EmitLValue(S.getIterationVariable())); HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); EmitOMPReductionClauseInit(S, LoopScope); EmitOMPPrivateLoopCounters(S, LoopScope); @@ -2856,6 +2877,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { /*ForceSimpleCall=*/true); } CGF.EmitOMPPrivateClause(S, LoopScope); + CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); (void)LoopScope.Privatize(); |