diff options
Diffstat (limited to 'clang/lib/CodeGen/CGStmtOpenMP.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 297 |
1 files changed, 228 insertions, 69 deletions
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 8434cdf2da2..8984aca7b70 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1410,82 +1410,15 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } -void CodeGenFunction::EmitOMPForOuterLoop( - OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic, +void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { auto &RT = CGM.getOpenMPRuntime(); - // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). - const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); - - assert((Ordered || - !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && - "static non-chunked schedule does not need outer loop"); - - // Emit outer loop. - // - // OpenMP [2.7.1, Loop Construct, Description, table 2-1] - // When schedule(dynamic,chunk_size) is specified, the iterations are - // distributed to threads in the team in chunks as the threads request them. - // Each thread executes a chunk of iterations, then requests another chunk, - // until no chunks remain to be distributed. Each chunk contains chunk_size - // iterations, except for the last chunk to be distributed, which may have - // fewer iterations. When no chunk_size is specified, it defaults to 1. - // - // When schedule(guided,chunk_size) is specified, the iterations are assigned - // to threads in the team in chunks as the executing threads request them. - // Each thread executes a chunk of iterations, then requests another chunk, - // until no chunks remain to be assigned. For a chunk_size of 1, the size of - // each chunk is proportional to the number of unassigned iterations divided - // by the number of threads in the team, decreasing to 1. For a chunk_size - // with value k (greater than 1), the size of each chunk is determined in the - // same way, with the restriction that the chunks do not contain fewer than k - // iterations (except for the last chunk to be assigned, which may have fewer - // than k iterations). - // - // When schedule(auto) is specified, the decision regarding scheduling is - // delegated to the compiler and/or runtime system. The programmer gives the - // implementation the freedom to choose any possible mapping of iterations to - // threads in the team. - // - // When schedule(runtime) is specified, the decision regarding scheduling is - // deferred until run time, and the schedule and chunk size are taken from the - // run-sched-var ICV. If the ICV is set to auto, the schedule is - // implementation defined - // - // while(__kmpc_dispatch_next(&LB, &UB)) { - // idx = LB; - // while (idx <= UB) { BODY; ++idx; - // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. - // } // inner loop - // } - // - // OpenMP [2.7.1, Loop Construct, Description, table 2-1] - // When schedule(static, chunk_size) is specified, iterations are divided into - // chunks of size chunk_size, and the chunks are assigned to the threads in - // the team in a round-robin fashion in the order of the thread number. - // - // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { - // while (idx <= UB) { BODY; ++idx; } // inner loop - // LB = LB + ST; - // UB = UB + ST; - // } - // - const Expr *IVExpr = S.getIterationVariable(); const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - if (DynamicOrOrdered) { - llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); - RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, - IVSize, IVSigned, Ordered, UBVal, Chunk); - } else { - RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, - IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk); - } - auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); // Start the loop with a block that tests the condition. @@ -1565,6 +1498,111 @@ void CodeGenFunction::EmitOMPForOuterLoop( // Tell the runtime we are done. if (!DynamicOrOrdered) RT.emitForStaticFinish(*this, S.getLocEnd()); + +} + +void CodeGenFunction::EmitOMPForOuterLoop( + OpenMPScheduleClauseKind ScheduleKind, bool IsMonotonic, + const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { + auto &RT = CGM.getOpenMPRuntime(); + + // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). + const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind); + + assert((Ordered || + !RT.isStaticNonchunked(ScheduleKind, /*Chunked=*/Chunk != nullptr)) && + "static non-chunked schedule does not need outer loop"); + + // Emit outer loop. + // + // OpenMP [2.7.1, Loop Construct, Description, table 2-1] + // When schedule(dynamic,chunk_size) is specified, the iterations are + // distributed to threads in the team in chunks as the threads request them. + // Each thread executes a chunk of iterations, then requests another chunk, + // until no chunks remain to be distributed. Each chunk contains chunk_size + // iterations, except for the last chunk to be distributed, which may have + // fewer iterations. When no chunk_size is specified, it defaults to 1. + // + // When schedule(guided,chunk_size) is specified, the iterations are assigned + // to threads in the team in chunks as the executing threads request them. + // Each thread executes a chunk of iterations, then requests another chunk, + // until no chunks remain to be assigned. For a chunk_size of 1, the size of + // each chunk is proportional to the number of unassigned iterations divided + // by the number of threads in the team, decreasing to 1. For a chunk_size + // with value k (greater than 1), the size of each chunk is determined in the + // same way, with the restriction that the chunks do not contain fewer than k + // iterations (except for the last chunk to be assigned, which may have fewer + // than k iterations). + // + // When schedule(auto) is specified, the decision regarding scheduling is + // delegated to the compiler and/or runtime system. The programmer gives the + // implementation the freedom to choose any possible mapping of iterations to + // threads in the team. + // + // When schedule(runtime) is specified, the decision regarding scheduling is + // deferred until run time, and the schedule and chunk size are taken from the + // run-sched-var ICV. If the ICV is set to auto, the schedule is + // implementation defined + // + // while(__kmpc_dispatch_next(&LB, &UB)) { + // idx = LB; + // while (idx <= UB) { BODY; ++idx; + // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. + // } // inner loop + // } + // + // OpenMP [2.7.1, Loop Construct, Description, table 2-1] + // When schedule(static, chunk_size) is specified, iterations are divided into + // chunks of size chunk_size, and the chunks are assigned to the threads in + // the team in a round-robin fashion in the order of the thread number. + // + // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { + // while (idx <= UB) { BODY; ++idx; } // inner loop + // LB = LB + ST; + // UB = UB + ST; + // } + // + + const Expr *IVExpr = S.getIterationVariable(); + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); + const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); + + if (DynamicOrOrdered) { + llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration()); + RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, + IVSize, IVSigned, Ordered, UBVal, Chunk); + } else { + RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, + Ordered, IL, LB, UB, ST, Chunk); + } + + EmitOMPOuterLoop(IsMonotonic, DynamicOrOrdered, S, LoopScope, Ordered, LB, UB, + ST, IL, Chunk); +} + +void CodeGenFunction::EmitOMPDistributeOuterLoop( + OpenMPDistScheduleClauseKind ScheduleKind, + const OMPDistributeDirective &S, OMPPrivateScope &LoopScope, + Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) { + + auto &RT = CGM.getOpenMPRuntime(); + + // Emit outer loop. + // Same behavior as a OMPForOuterLoop, except that schedule cannot be + // dynamic + // + + const Expr *IVExpr = S.getIterationVariable(); + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); + const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); + + RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, + IVSize, IVSigned, /* Ordered = */ false, + IL, LB, UB, ST, Chunk); + + EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, + S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk); } /// \brief Emit a helper variable and return corresponding lvalue. @@ -2191,9 +2229,130 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { }(), S.getLocStart()); } +void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { + // Emit the loop iteration variable. + auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); + auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); + EmitVarDecl(*IVDecl); + + // Emit the iterations count variable. + // If it is not a variable, Sema decided to calculate iterations count on each + // iteration (e.g., it is foldable into a constant). + if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { + EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); + // Emit calculation of the iterations count. + EmitIgnoredExpr(S.getCalcLastIteration()); + } + + auto &RT = CGM.getOpenMPRuntime(); + + // Check pre-condition. + { + // Skip the entire loop if we don't meet the precondition. + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return; + } else { + auto *ThenBlock = createBasicBlock("omp.precond.then"); + ContBlock = createBasicBlock("omp.precond.end"); + emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, + getProfileCount(&S)); + EmitBlock(ThenBlock); + incrementProfileCounter(&S); + } + + // Emit 'then' code. + { + // Emit helper vars inits. + LValue LB = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); + LValue UB = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); + LValue ST = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); + LValue IL = + EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); + + OMPPrivateScope LoopScope(*this); + emitPrivateLoopCounters(*this, LoopScope, S.counters(), + S.private_counters()); + (void)LoopScope.Privatize(); + + // Detect the distribute schedule kind and chunk. + llvm::Value *Chunk = nullptr; + OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; + if (auto *C = S.getSingleClause<OMPDistScheduleClause>()) { + ScheduleKind = C->getDistScheduleKind(); + if (const auto *Ch = C->getChunkSize()) { + Chunk = EmitScalarExpr(Ch); + Chunk = EmitScalarConversion(Chunk, Ch->getType(), + S.getIterationVariable()->getType(), + S.getLocStart()); + } + } + const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); + const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); + + // OpenMP [2.10.8, distribute Construct, Description] + // If dist_schedule is specified, kind must be static. If specified, + // iterations are divided into chunks of size chunk_size, chunks are + // assigned to the teams of the league in a round-robin fashion in the + // order of the team number. When no chunk_size is specified, the + // iteration space is divided into chunks that are approximately equal + // in size, and at most one chunk is distributed to each team of the + // league. The size of the chunks is unspecified in this case. + if (RT.isStaticNonchunked(ScheduleKind, + /* Chunked */ Chunk != nullptr)) { + RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, + IVSize, IVSigned, /* Ordered = */ false, + IL.getAddress(), LB.getAddress(), + UB.getAddress(), ST.getAddress()); + auto LoopExit = + getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); + // UB = min(UB, GlobalUB); + EmitIgnoredExpr(S.getEnsureUpperBound()); + // IV = LB; + EmitIgnoredExpr(S.getInit()); + // while (idx <= UB) { BODY; ++idx; } + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), + S.getInc(), + [&S, LoopExit](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + EmitBlock(LoopExit.getBlock()); + // Tell the runtime we are done. + RT.emitForStaticFinish(*this, S.getLocStart()); + } else { + // Emit the outer loop, which requests its work chunk [LB..UB] from + // runtime and runs the inner loop to process it. + EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, + LB.getAddress(), UB.getAddress(), ST.getAddress(), + IL.getAddress(), Chunk); + } + } + + // We're now done with the loop, so jump to the continuation block. + if (ContBlock) { + EmitBranch(ContBlock); + EmitBlock(ContBlock, true); + } + } +} + void CodeGenFunction::EmitOMPDistributeDirective( const OMPDistributeDirective &S) { - llvm_unreachable("CodeGen for 'omp distribute' is not supported yet."); + LexicalScope Scope(*this, S.getSourceRange()); + auto &&CodeGen = [&S](CodeGenFunction &CGF) { + CGF.EmitOMPDistributeLoop(S); + }; + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, + false); } static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, |