diff options
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 22 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 40 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 436 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.h | 96 |
4 files changed, 123 insertions, 471 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index d1a706b8821..874b6a69e51 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2466,14 +2466,16 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule, return Schedule | Modifier; } -void CGOpenMPRuntime::emitForDispatchInit( - CodeGenFunction &CGF, SourceLocation Loc, - const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, - bool Ordered, const DispatchRTInput &DispatchValues) { +void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF, + SourceLocation Loc, + const OpenMPScheduleTy &ScheduleKind, + unsigned IVSize, bool IVSigned, + bool Ordered, llvm::Value *UB, + llvm::Value *Chunk) { if (!CGF.HaveInsertPoint()) return; - OpenMPSchedType Schedule = getRuntimeSchedule( - ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); + OpenMPSchedType Schedule = + getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered); assert(Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && @@ -2484,14 +2486,14 @@ void CGOpenMPRuntime::emitForDispatchInit( // kmp_int[32|64] stride, kmp_int[32|64] chunk); // If the Chunk was not specified in the clause - use default value 1. - llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk - : CGF.Builder.getIntN(IVSize, 1); + if (Chunk == nullptr) + Chunk = CGF.Builder.getIntN(IVSize, 1); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.getInt32(addMonoNonMonoModifier( Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type - DispatchValues.LB, // Lower - DispatchValues.UB, // Upper + CGF.Builder.getIntN(IVSize, 0), // Lower + UB, // Upper CGF.Builder.getIntN(IVSize, 1), // Stride Chunk // Chunk }; diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 6f460f12179..7901a6b7a8f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -672,50 +672,16 @@ public: /// virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const; - /// struct with the values to be passed to the dispatch runtime function - struct DispatchRTInput { - /// Loop lower bound - llvm::Value *LB = nullptr; - /// Loop upper bound - llvm::Value *UB = nullptr; - /// Chunk size specified using 'schedule' clause (nullptr if chunk - /// was not specified) - llvm::Value *Chunk = nullptr; - DispatchRTInput() = default; - DispatchRTInput(llvm::Value *LB, llvm::Value *UB, llvm::Value *Chunk) - : LB(LB), UB(UB), Chunk(Chunk) {} - }; - - /// Call the appropriate runtime routine to initialize it before start - /// of loop. - - /// This is used for non static scheduled types and when the ordered - /// clause is present on the loop construct. - /// Depending on the loop schedule, it is necessary to call some runtime - /// routine before start of the OpenMP loop to get the loop upper / lower - /// bounds \a LB and \a UB and stride \a ST. - /// - /// \param CGF Reference to current CodeGenFunction. - /// \param Loc Clang source location. - /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause. - /// \param IVSize Size of the iteration variable in bits. - /// \param IVSigned Sign of the interation variable. - /// \param Ordered true if loop is ordered, false otherwise. - /// \param DispatchValues struct containing llvm values for lower bound, upper - /// bound, and chunk expression. - /// For the default (nullptr) value, the chunk 1 will be used. - /// virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, - const DispatchRTInput &DispatchValues); + llvm::Value *UB, + llvm::Value *Chunk = nullptr); /// \brief Call the appropriate runtime routine to initialize it before start /// of loop. /// - /// This is used only in case of static schedule, when the user did not - /// specify a ordered clause on the loop construct. - /// Depending on the loop schedule, it is necessary to call some runtime + /// Depending on the loop schedule, it is nesessary to call some runtime /// routine before start of the OpenMP loop to get the loop upper / lower /// bounds \a LB and \a UB and stride \a ST. /// diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index f738dd0750f..22269e42c7a 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -87,8 +87,7 @@ public: class OMPParallelScope final : public OMPLexicalScope { bool EmitPreInitStmt(const OMPExecutableDirective &S) { OpenMPDirectiveKind Kind = S.getDirectiveKind(); - return !(isOpenMPTargetExecutionDirective(Kind) || - isOpenMPLoopBoundSharingDirective(Kind)) && + return !isOpenMPTargetExecutionDirective(Kind) && isOpenMPParallelDirective(Kind); } @@ -1250,20 +1249,10 @@ static void emitPostUpdateForReductionClause( CGF.EmitBlock(DoneBB, /*IsFinished=*/true); } -namespace { -/// Codegen lambda for appending distribute lower and upper bounds to outlined -/// parallel function. This is necessary for combined constructs such as -/// 'distribute parallel for' -typedef llvm::function_ref<void(CodeGenFunction &, - const OMPExecutableDirective &, - llvm::SmallVectorImpl<llvm::Value *> &)> - CodeGenBoundParametersTy; -} // anonymous namespace - -static void emitCommonOMPParallelDirective( - CodeGenFunction &CGF, const OMPExecutableDirective &S, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, - const CodeGenBoundParametersTy &CodeGenBoundParameters) { +static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, + const OMPExecutableDirective &S, + OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) { const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); @@ -1290,20 +1279,11 @@ static void emitCommonOMPParallelDirective( OMPParallelScope Scope(CGF, S); llvm::SmallVector<llvm::Value *, 16> CapturedVars; - // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk - // lower and upper bounds with the pragma 'for' chunking mechanism. - // The following lambda takes care of appending the lower and upper bound - // parameters when necessary - CodeGenBoundParameters(CGF, S, CapturedVars); CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, CapturedVars, IfCond); } -static void emitEmptyBoundParameters(CodeGenFunction &, - const OMPExecutableDirective &, - llvm::SmallVectorImpl<llvm::Value *> &) {} - void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // Emit parallel region as a standalone region. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { @@ -1324,8 +1304,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, - emitEmptyBoundParameters); + emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen); emitPostUpdateForReductionClause( *this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } @@ -1670,13 +1649,6 @@ void CodeGenFunction::EmitOMPSimdFinal( EmitBlock(DoneBB, /*IsFinished=*/true); } -static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, - const OMPLoopDirective &S, - CodeGenFunction::JumpDest LoopExit) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); -}; - void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPLoopScope PreInitScope(CGF, S); @@ -1759,12 +1731,9 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } -void CodeGenFunction::EmitOMPOuterLoop( - bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, - CodeGenFunction::OMPPrivateScope &LoopScope, - const CodeGenFunction::OMPLoopArguments &LoopArgs, - const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, - const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { +void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, + const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { auto &RT = CGM.getOpenMPRuntime(); const Expr *IVExpr = S.getIterationVariable(); @@ -1782,18 +1751,15 @@ void CodeGenFunction::EmitOMPOuterLoop( llvm::Value *BoolCondVal = nullptr; if (!DynamicOrOrdered) { - // UB = min(UB, GlobalUB) or - // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. - // 'distribute parallel for') - EmitIgnoredExpr(LoopArgs.EUB); + // UB = min(UB, GlobalUB) + EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB - EmitIgnoredExpr(LoopArgs.Init); + EmitIgnoredExpr(S.getInit()); // IV < UB - BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond); + BoolCondVal = EvaluateExprAsBool(S.getCond()); } else { - BoolCondVal = - RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL, - LoopArgs.LB, LoopArgs.UB, LoopArgs.ST); + BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, + LB, UB, ST); } // If there are any cleanups between here and the loop-exit scope, @@ -1813,7 +1779,7 @@ void CodeGenFunction::EmitOMPOuterLoop( // Emit "IV = LB" (in case of static schedule, we have already calculated new // LB for loop condition and emitted it above). if (DynamicOrOrdered) - EmitIgnoredExpr(LoopArgs.Init); + EmitIgnoredExpr(S.getInit()); // Create a block for the increment. auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); @@ -1827,27 +1793,24 @@ void CodeGenFunction::EmitOMPOuterLoop( EmitOMPSimdInit(S, IsMonotonic); SourceLocation Loc = S.getLocStart(); - - // when 'distribute' is not combined with a 'for': - // while (idx <= UB) { BODY; ++idx; } - // when 'distribute' is combined with a 'for' - // (e.g. 'distribute parallel for') - // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } - EmitOMPInnerLoop( - S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, - [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { - CodeGenLoop(CGF, S, LoopExit); - }, - [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { - CodeGenOrdered(CGF, Loc, IVSize, IVSigned); - }); + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), + [&S, LoopExit](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); + }, + [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) { + if (Ordered) { + CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd( + CGF, Loc, IVSize, IVSigned); + } + }); EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); if (!DynamicOrOrdered) { // Emit "LB = LB + Stride", "UB = UB + Stride". - EmitIgnoredExpr(LoopArgs.NextLB); - EmitIgnoredExpr(LoopArgs.NextUB); + EmitIgnoredExpr(S.getNextLowerBound()); + EmitIgnoredExpr(S.getNextUpperBound()); } EmitBranch(CondBlock); @@ -1866,8 +1829,7 @@ void CodeGenFunction::EmitOMPOuterLoop( void CodeGenFunction::EmitOMPForOuterLoop( const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, - const OMPLoopArguments &LoopArgs, - const CodeGenDispatchBoundsTy &CGDispatchBounds) { + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { auto &RT = CGM.getOpenMPRuntime(); // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). @@ -1876,7 +1838,7 @@ void CodeGenFunction::EmitOMPForOuterLoop( assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, - LoopArgs.Chunk != nullptr)) && + /*Chunked=*/Chunk != nullptr)) && "static non-chunked schedule does not need outer loop"); // Emit outer loop. @@ -1934,46 +1896,22 @@ void CodeGenFunction::EmitOMPForOuterLoop( const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); if (DynamicOrOrdered) { - auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); - llvm::Value *LBVal = DispatchBounds.first; - llvm::Value *UBVal = DispatchBounds.second; - CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, - LoopArgs.Chunk}; + llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize, - IVSigned, Ordered, DipatchRTInputValues); + IVSigned, Ordered, UBVal, Chunk); } else { RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, - Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, - LoopArgs.ST, LoopArgs.Chunk); + Ordered, IL, LB, UB, ST, Chunk); } - auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, - const unsigned IVSize, - const bool IVSigned) { - if (Ordered) { - CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, - IVSigned); - } - }; - - OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, - LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); - OuterLoopArgs.IncExpr = S.getInc(); - OuterLoopArgs.Init = S.getInit(); - OuterLoopArgs.Cond = S.getCond(); - OuterLoopArgs.NextLB = S.getNextLowerBound(); - OuterLoopArgs.NextUB = S.getNextUpperBound(); - EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs, - emitOMPLoopBodyWithStopPoint, CodeGenOrdered); + EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB, + ST, IL, Chunk); } -static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, - const unsigned IVSize, const bool IVSigned) {} - void CodeGenFunction::EmitOMPDistributeOuterLoop( - OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, - const CodeGenLoopTy &CodeGenLoopContent) { + OpenMPDistScheduleClauseKind ScheduleKind, + const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { auto &RT = CGM.getOpenMPRuntime(); @@ -1986,159 +1924,26 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop( const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, - IVSigned, /* Ordered = */ false, LoopArgs.IL, - LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, - LoopArgs.Chunk); + RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, + IVSize, IVSigned, /* Ordered = */ false, + IL, LB, UB, ST, Chunk); - // for combined 'distribute' and 'for' the increment expression of distribute - // is store in DistInc. For 'distribute' alone, it is in Inc. - Expr *IncExpr; - if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) - IncExpr = S.getDistInc(); - else - IncExpr = S.getInc(); - - // this routine is shared by 'omp distribute parallel for' and - // 'omp distribute': select the right EUB expression depending on the - // directive - OMPLoopArguments OuterLoopArgs; - OuterLoopArgs.LB = LoopArgs.LB; - OuterLoopArgs.UB = LoopArgs.UB; - OuterLoopArgs.ST = LoopArgs.ST; - OuterLoopArgs.IL = LoopArgs.IL; - OuterLoopArgs.Chunk = LoopArgs.Chunk; - OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedEnsureUpperBound() - : S.getEnsureUpperBound(); - OuterLoopArgs.IncExpr = IncExpr; - OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedInit() - : S.getInit(); - OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedCond() - : S.getCond(); - OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedNextLowerBound() - : S.getNextLowerBound(); - OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedNextUpperBound() - : S.getNextUpperBound(); - - EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, - LoopScope, OuterLoopArgs, CodeGenLoopContent, - emitEmptyOrdered); -} - -/// Emit a helper variable and return corresponding lvalue. -static LValue EmitOMPHelperVar(CodeGenFunction &CGF, - const DeclRefExpr *Helper) { - auto VDecl = cast<VarDecl>(Helper->getDecl()); - CGF.EmitVarDecl(*VDecl); - return CGF.EmitLValue(Helper); -} - -static std::pair<LValue, LValue> -emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, - const OMPExecutableDirective &S) { - const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); - LValue LB = - EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); - LValue UB = - EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); - - // When composing 'distribute' with 'for' (e.g. as in 'distribute - // parallel for') we need to use the 'distribute' - // chunk lower and upper bounds rather than the whole loop iteration - // space. These are parameters to the outlined function for 'parallel' - // and we copy the bounds of the previous schedule into the - // the current ones. - LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable()); - LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable()); - llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation()); - PrevLBVal = CGF.EmitScalarConversion( - PrevLBVal, LS.getPrevLowerBoundVariable()->getType(), - LS.getIterationVariable()->getType(), SourceLocation()); - llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation()); - PrevUBVal = CGF.EmitScalarConversion( - PrevUBVal, LS.getPrevUpperBoundVariable()->getType(), - LS.getIterationVariable()->getType(), SourceLocation()); - - CGF.EmitStoreOfScalar(PrevLBVal, LB); - CGF.EmitStoreOfScalar(PrevUBVal, UB); - - return {LB, UB}; -} - -/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then -/// we need to use the LB and UB expressions generated by the worksharing -/// code generation support, whereas in non combined situations we would -/// just emit 0 and the LastIteration expression -/// This function is necessary due to the difference of the LB and UB -/// types for the RT emission routines for 'for_static_init' and -/// 'for_dispatch_init' -static std::pair<llvm::Value *, llvm::Value *> -emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, - const OMPExecutableDirective &S, - Address LB, Address UB) { - const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); - const Expr *IVExpr = LS.getIterationVariable(); - // when implementing a dynamic schedule for a 'for' combined with a - // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop - // is not normalized as each team only executes its own assigned - // distribute chunk - QualType IteratorTy = IVExpr->getType(); - llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy, - SourceLocation()); - llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy, - SourceLocation()); - return {LBVal, UBVal}; -}; - -static void emitDistributeParallelForDistributeInnerBoundParams( - CodeGenFunction &CGF, const OMPExecutableDirective &S, - llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { - const auto &Dir = cast<OMPLoopDirective>(S); - LValue LB = - CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable())); - auto LBCast = CGF.Builder.CreateIntCast( - CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false); - CapturedVars.push_back(LBCast); - LValue UB = - CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable())); - - auto UBCast = CGF.Builder.CreateIntCast( - CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false); - CapturedVars.push_back(UBCast); -}; - -static void -emitInnerParallelForWhenCombined(CodeGenFunction &CGF, - const OMPLoopDirective &S, - CodeGenFunction::JumpDest LoopExit) { - auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, - PrePostActionTy &) { - CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(), - emitDistributeParallelForInnerBounds, - emitDistributeParallelForDispatchBounds); - }; - - emitCommonOMPParallelDirective( - CGF, S, OMPD_for, CGInlinedWorksharingLoop, - emitDistributeParallelForDistributeInnerBoundParams); + EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, + S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); } void CodeGenFunction::EmitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective &S) { - auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, - S.getDistInc()); - }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); - OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for, - /*HasCancel=*/false); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, - /*HasCancel=*/false); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, OMPD_distribute_parallel_for, + [&S](CodeGenFunction &CGF, PrePostActionTy &) { + OMPLoopScope PreInitScope(CGF, S); + OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for, + /*HasCancel=*/false); + CGF.EmitStmt( + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }); } void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( @@ -2276,6 +2081,14 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( }); } +/// \brief Emit a helper variable and return corresponding lvalue. +static LValue EmitOMPHelperVar(CodeGenFunction &CGF, + const DeclRefExpr *Helper) { + auto VDecl = cast<VarDecl>(Helper->getDecl()); + CGF.EmitVarDecl(*VDecl); + return CGF.EmitLValue(Helper); +} + namespace { struct ScheduleKindModifiersTy { OpenMPScheduleClauseKind Kind; @@ -2288,10 +2101,7 @@ namespace { }; } // namespace -bool CodeGenFunction::EmitOMPWorksharingLoop( - const OMPLoopDirective &S, Expr *EUB, - const CodeGenLoopBoundsTy &CodeGenLoopBounds, - const CodeGenDispatchBoundsTy &CGDispatchBounds) { +bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { // Emit the loop iteration variable. auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); @@ -2341,10 +2151,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( emitAlignedClause(*this, S); EmitOMPLinearClauseInit(S); // Emit helper vars inits. - - std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); - LValue LB = Bounds.first; - LValue UB = Bounds.second; + LValue LB = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); LValue ST = EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); LValue IL = @@ -2430,11 +2240,9 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. - const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(), - ST.getAddress(), IL.getAddress(), - Chunk, EUB); EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, - LoopArguments, CGDispatchBounds); + LB.getAddress(), UB.getAddress(), ST.getAddress(), + IL.getAddress(), Chunk); } if (isOpenMPSimdDirective(S.getDirectiveKind())) { EmitOMPSimdFinal(S, @@ -2472,42 +2280,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( return HasLastprivateClause; } -/// The following two functions generate expressions for the loop lower -/// and upper bounds in case of static and dynamic (dispatch) schedule -/// of the associated 'for' or 'distribute' loop. -static std::pair<LValue, LValue> -emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { - const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); - LValue LB = - EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable())); - LValue UB = - EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable())); - return {LB, UB}; -} - -/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not -/// consider the lower and upper bound expressions generated by the -/// worksharing loop support, but we use 0 and the iteration space size as -/// constants -static std::pair<llvm::Value *, llvm::Value *> -emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, - Address LB, Address UB) { - const OMPLoopDirective &LS = cast<OMPLoopDirective>(S); - const Expr *IVExpr = LS.getIterationVariable(); - const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType()); - llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); - llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); - return {LBVal, UBVal}; -} - void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); - HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), - emitForLoopBounds, - emitDispatchForLoopBounds); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S); }; { OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2525,9 +2303,7 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { - HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), - emitForLoopBounds, - emitDispatchForLoopBounds); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S); }; { OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); @@ -2778,11 +2554,9 @@ void CodeGenFunction::EmitOMPParallelForDirective( // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); - CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, - emitDispatchForLoopBounds); + CGF.EmitOMPWorksharingLoop(S); }; - emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, - emitEmptyBoundParameters); + emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); } void CodeGenFunction::EmitOMPParallelForSimdDirective( @@ -2790,11 +2564,9 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, - emitDispatchForLoopBounds); + CGF.EmitOMPWorksharingLoop(S); }; - emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, - emitEmptyBoundParameters); + emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); } void CodeGenFunction::EmitOMPParallelSectionsDirective( @@ -2804,8 +2576,7 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitSections(S); }; - emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, - emitEmptyBoundParameters); + emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); } void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, @@ -3023,9 +2794,7 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { }(), S.getLocStart()); } -void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, - const CodeGenLoopTy &CodeGenLoop, - Expr *IncExpr) { +void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { // Emit the loop iteration variable. auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); @@ -3066,17 +2835,10 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, // Emit 'then' code. { // Emit helper vars inits. - - LValue LB = EmitOMPHelperVar( - *this, cast<DeclRefExpr>( - (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedLowerBoundVariable() - : S.getLowerBoundVariable()))); - LValue UB = EmitOMPHelperVar( - *this, cast<DeclRefExpr>( - (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedUpperBoundVariable() - : S.getUpperBoundVariable()))); + LValue LB = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); LValue ST = EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); LValue IL = @@ -3128,25 +2890,15 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); - EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedEnsureUpperBound() - : S.getEnsureUpperBound()); + EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB; - EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedInit() - : S.getInit()); - - Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) - ? S.getCombinedCond() - : S.getCond(); - - // for distribute alone, codegen + EmitIgnoredExpr(S.getInit()); // while (idx <= UB) { BODY; ++idx; } - // when combined with 'for' (e.g. as in 'distribute parallel for') - // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, - [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { - CodeGenLoop(CGF, S, LoopExit); + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), + S.getInc(), + [&S, LoopExit](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); }, [](CodeGenFunction &) {}); EmitBlock(LoopExit.getBlock()); @@ -3155,11 +2907,9 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, } else { // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. - const OMPLoopArguments LoopArguments = { - LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), - Chunk}; - EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments, - CodeGenLoop); + EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, + LB.getAddress(), UB.getAddress(), ST.getAddress(), + IL.getAddress(), Chunk); } // Emit final copy of the lastprivate variables if IsLastIter != 0. @@ -3181,8 +2931,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, void CodeGenFunction::EmitOMPDistributeDirective( const OMPDistributeDirective &S) { auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { - - CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc()); + CGF.EmitOMPDistributeLoop(S); }; OMPLexicalScope Scope(*this, S, /*AsInlined=*/true); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, @@ -4091,8 +3840,7 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF, CGF.EmitStmt(CS->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, - emitEmptyBoundParameters); + emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen); emitPostUpdateForReductionClause( CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 1ded824ba5b..fa72019eb08 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -175,25 +175,6 @@ public: // because of jumps. VarBypassDetector Bypasses; - // CodeGen lambda for loops and support for ordered clause - typedef llvm::function_ref<void(CodeGenFunction &, const OMPLoopDirective &, - JumpDest)> - CodeGenLoopTy; - typedef llvm::function_ref<void(CodeGenFunction &, SourceLocation, - const unsigned, const bool)> - CodeGenOrderedTy; - - // Codegen lambda for loop bounds in worksharing loop constructs - typedef llvm::function_ref<std::pair<LValue, LValue>( - CodeGenFunction &, const OMPExecutableDirective &S)> - CodeGenLoopBoundsTy; - - // Codegen lambda for loop bounds in dispatch-based loop implementation - typedef llvm::function_ref<std::pair<llvm::Value *, llvm::Value *>( - CodeGenFunction &, const OMPExecutableDirective &S, Address LB, - Address UB)> - CodeGenDispatchBoundsTy; - /// \brief CGBuilder insert helper. This function is called after an /// instruction is created using Builder. void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, @@ -2775,6 +2756,7 @@ public: void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S); void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S); void EmitOMPDistributeDirective(const OMPDistributeDirective &S); + void EmitOMPDistributeLoop(const OMPDistributeDirective &S); void EmitOMPDistributeParallelForDirective( const OMPDistributeParallelForDirective &S); void EmitOMPDistributeParallelForSimdDirective( @@ -2831,78 +2813,32 @@ public: void EmitOMPPrivateLoopCounters(const OMPLoopDirective &S, OMPPrivateScope &LoopScope); - /// Helper for the OpenMP loop directives. - void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); - - /// \brief Emit code for the worksharing loop-based directive. - /// \return true, if this construct has any lastprivate clause, false - - /// otherwise. - bool EmitOMPWorksharingLoop(const OMPLoopDirective &S, Expr *EUB, - const CodeGenLoopBoundsTy &CodeGenLoopBounds, - const CodeGenDispatchBoundsTy &CGDispatchBounds); - private: /// Helpers for blocks llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info); /// Helpers for the OpenMP loop directives. + void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit); void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false); void EmitOMPSimdFinal( const OMPLoopDirective &D, const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen); - - void EmitOMPDistributeLoop(const OMPLoopDirective &S, - const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr); - - /// struct with the values to be passed to the OpenMP loop-related functions - struct OMPLoopArguments { - /// loop lower bound - Address LB = Address::invalid(); - /// loop upper bound - Address UB = Address::invalid(); - /// loop stride - Address ST = Address::invalid(); - /// isLastIteration argument for runtime functions - Address IL = Address::invalid(); - /// Chunk value generated by sema - llvm::Value *Chunk = nullptr; - /// EnsureUpperBound - Expr *EUB = nullptr; - /// IncrementExpression - Expr *IncExpr = nullptr; - /// Loop initialization - Expr *Init = nullptr; - /// Loop exit condition - Expr *Cond = nullptr; - /// Update of LB after a whole chunk has been executed - Expr *NextLB = nullptr; - /// Update of UB after a whole chunk has been executed - Expr *NextUB = nullptr; - OMPLoopArguments() = default; - OMPLoopArguments(Address LB, Address UB, Address ST, Address IL, - llvm::Value *Chunk = nullptr, Expr *EUB = nullptr, - Expr *IncExpr = nullptr, Expr *Init = nullptr, - Expr *Cond = nullptr, Expr *NextLB = nullptr, - Expr *NextUB = nullptr) - : LB(LB), UB(UB), ST(ST), IL(IL), Chunk(Chunk), EUB(EUB), - IncExpr(IncExpr), Init(Init), Cond(Cond), NextLB(NextLB), - NextUB(NextUB) {} - }; - void EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, - const OMPLoopDirective &S, OMPPrivateScope &LoopScope, - const OMPLoopArguments &LoopArgs, - const CodeGenLoopTy &CodeGenLoop, - const CodeGenOrderedTy &CodeGenOrdered); + /// \brief Emit code for the worksharing loop-based directive. + /// \return true, if this construct has any lastprivate clause, false - + /// otherwise. + bool EmitOMPWorksharingLoop(const OMPLoopDirective &S); + void EmitOMPOuterLoop(bool IsMonotonic, bool DynamicOrOrdered, + const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); void EmitOMPForOuterLoop(const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, bool Ordered, - const OMPLoopArguments &LoopArgs, - const CodeGenDispatchBoundsTy &CGDispatchBounds); - void EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind, - const OMPLoopDirective &S, - OMPPrivateScope &LoopScope, - const OMPLoopArguments &LoopArgs, - const CodeGenLoopTy &CodeGenLoopContent); + OMPPrivateScope &LoopScope, bool Ordered, Address LB, + Address UB, Address ST, Address IL, + llvm::Value *Chunk); + void EmitOMPDistributeOuterLoop( + OpenMPDistScheduleClauseKind ScheduleKind, + const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk); /// \brief Emit code for sections directive. void EmitSections(const OMPExecutableDirective &S); |