diff options
Diffstat (limited to 'clang/lib/CodeGen/CGStmtOpenMP.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 116 |
1 files changed, 88 insertions, 28 deletions
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index c8937956f45..718b8c326dc 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2006,7 +2006,7 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop( RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); // for combined 'distribute' and 'for' the increment expression of distribute - // is store in DistInc. For 'distribute' alone, it is in Inc. + // is stored in DistInc. For 'distribute' alone, it is in Inc. Expr *IncExpr; if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) IncExpr = S.getDistInc(); @@ -2298,22 +2298,28 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( (void)LoopScope.Privatize(); // Detect the loop schedule kind and chunk. - llvm::Value *Chunk = nullptr; + const Expr *ChunkExpr = nullptr; OpenMPScheduleTy ScheduleKind; if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { ScheduleKind.Schedule = C->getScheduleKind(); ScheduleKind.M1 = C->getFirstScheduleModifier(); ScheduleKind.M2 = C->getSecondScheduleModifier(); - if (const Expr *Ch = C->getChunkSize()) { - Chunk = EmitScalarExpr(Ch); - Chunk = EmitScalarConversion(Chunk, Ch->getType(), - S.getIterationVariable()->getType(), - S.getBeginLoc()); - } + ChunkExpr = C->getChunkSize(); } else { // Default behaviour for schedule clause. CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( - *this, S, ScheduleKind.Schedule, Chunk); + *this, S, ScheduleKind.Schedule, ChunkExpr); + } + bool HasChunkSizeOne = false; + llvm::Value *Chunk = nullptr; + if (ChunkExpr) { + Chunk = EmitScalarExpr(ChunkExpr); + Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(), + S.getIterationVariable()->getType(), + S.getBeginLoc()); + llvm::APSInt EvaluatedChunk; + if (ChunkExpr->EvaluateAsInt(EvaluatedChunk, getContext())) + HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); } const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); @@ -2321,8 +2327,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( // If the static schedule kind is specified or if the ordered clause is // specified, and if no monotonic modifier is specified, the effect will // be as if the monotonic modifier was specified. - if (RT.isStaticNonchunked(ScheduleKind.Schedule, - /* Chunked */ Chunk != nullptr) && + bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, + /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && + isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); + if ((RT.isStaticNonchunked(ScheduleKind.Schedule, + /* Chunked */ Chunk != nullptr) || + StaticChunkedOne) && !Ordered) { if (isOpenMPSimdDirective(S.getDirectiveKind())) EmitOMPSimdInit(S, /*IsMonotonic=*/true); @@ -2333,23 +2343,38 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( // unspecified in this case. CGOpenMPRuntime::StaticRTInput StaticInit( IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(), - UB.getAddress(), ST.getAddress()); + UB.getAddress(), ST.getAddress(), + StaticChunkedOne ? Chunk : nullptr); RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); JumpDest LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); // UB = min(UB, GlobalUB); - EmitIgnoredExpr(S.getEnsureUpperBound()); + if (!StaticChunkedOne) + EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB; EmitIgnoredExpr(S.getInit()); - // while (idx <= UB) { BODY; ++idx; } - EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), - S.getInc(), - [&S, LoopExit](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); - }, - [](CodeGenFunction &) {}); + // For unchunked static schedule generate: + // + // while (idx <= UB) { + // BODY; + // ++idx; + // } + // + // For static schedule with chunk one: + // + // while (IV <= PrevUB) { + // BODY; + // IV += ST; + // } + EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(), + StaticChunkedOne ? S.getDistInc() : S.getInc(), + [&S, LoopExit](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, LoopExit); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. auto &&CodeGen = [&S](CodeGenFunction &CGF) { @@ -3345,13 +3370,18 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, // iteration space is divided into chunks that are approximately equal // in size, and at most one chunk is distributed to each team of the // league. The size of the chunks is unspecified in this case. + bool StaticChunked = RT.isStaticChunked( + ScheduleKind, /* Chunked */ Chunk != nullptr) && + isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); if (RT.isStaticNonchunked(ScheduleKind, - /* Chunked */ Chunk != nullptr)) { + /* Chunked */ Chunk != nullptr) || + StaticChunked) { if (isOpenMPSimdDirective(S.getDirectiveKind())) EmitOMPSimdInit(S, /*IsMonotonic=*/true); CGOpenMPRuntime::StaticRTInput StaticInit( IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), - LB.getAddress(), UB.getAddress(), ST.getAddress()); + LB.getAddress(), UB.getAddress(), ST.getAddress(), + StaticChunked ? Chunk : nullptr); RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit); JumpDest LoopExit = @@ -3370,15 +3400,45 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, ? S.getCombinedCond() : S.getCond(); - // for distribute alone, codegen - // while (idx <= UB) { BODY; ++idx; } - // when combined with 'for' (e.g. as in 'distribute parallel for') - // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } + if (StaticChunked) + Cond = S.getCombinedDistCond(); + + // For static unchunked schedules generate: + // + // 1. For distribute alone, codegen + // while (idx <= UB) { + // BODY; + // ++idx; + // } + // + // 2. When combined with 'for' (e.g. as in 'distribute parallel for') + // while (idx <= UB) { + // <CodeGen rest of pragma>(LB, UB); + // idx += ST; + // } + // + // For static chunk one schedule generate: + // + // while (IV <= GlobalUB) { + // <CodeGen rest of pragma>(LB, UB); + // LB += ST; + // UB += ST; + // UB = min(UB, GlobalUB); + // IV = LB; + // } + // EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr, [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { CodeGenLoop(CGF, S, LoopExit); }, - [](CodeGenFunction &) {}); + [&S, StaticChunked](CodeGenFunction &CGF) { + if (StaticChunked) { + CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound()); + CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound()); + CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound()); + CGF.EmitIgnoredExpr(S.getCombinedInit()); + } + }); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind()); |