summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGStmtOpenMP.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGStmtOpenMP.cpp')
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp116
1 files changed, 88 insertions, 28 deletions
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index c8937956f45..718b8c326dc 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2006,7 +2006,7 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
// for combined 'distribute' and 'for' the increment expression of distribute
- // is store in DistInc. For 'distribute' alone, it is in Inc.
+ // is stored in DistInc. For 'distribute' alone, it is in Inc.
Expr *IncExpr;
if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
IncExpr = S.getDistInc();
@@ -2298,22 +2298,28 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
(void)LoopScope.Privatize();
// Detect the loop schedule kind and chunk.
- llvm::Value *Chunk = nullptr;
+ const Expr *ChunkExpr = nullptr;
OpenMPScheduleTy ScheduleKind;
if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
ScheduleKind.Schedule = C->getScheduleKind();
ScheduleKind.M1 = C->getFirstScheduleModifier();
ScheduleKind.M2 = C->getSecondScheduleModifier();
- if (const Expr *Ch = C->getChunkSize()) {
- Chunk = EmitScalarExpr(Ch);
- Chunk = EmitScalarConversion(Chunk, Ch->getType(),
- S.getIterationVariable()->getType(),
- S.getBeginLoc());
- }
+ ChunkExpr = C->getChunkSize();
} else {
// Default behaviour for schedule clause.
CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
- *this, S, ScheduleKind.Schedule, Chunk);
+ *this, S, ScheduleKind.Schedule, ChunkExpr);
+ }
+ bool HasChunkSizeOne = false;
+ llvm::Value *Chunk = nullptr;
+ if (ChunkExpr) {
+ Chunk = EmitScalarExpr(ChunkExpr);
+ Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
+ S.getIterationVariable()->getType(),
+ S.getBeginLoc());
+ llvm::APSInt EvaluatedChunk;
+ if (ChunkExpr->EvaluateAsInt(EvaluatedChunk, getContext()))
+ HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
}
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
@@ -2321,8 +2327,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
// If the static schedule kind is specified or if the ordered clause is
// specified, and if no monotonic modifier is specified, the effect will
// be as if the monotonic modifier was specified.
- if (RT.isStaticNonchunked(ScheduleKind.Schedule,
- /* Chunked */ Chunk != nullptr) &&
+ bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
+ /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
+ isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
+ if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
+ /* Chunked */ Chunk != nullptr) ||
+ StaticChunkedOne) &&
!Ordered) {
if (isOpenMPSimdDirective(S.getDirectiveKind()))
EmitOMPSimdInit(S, /*IsMonotonic=*/true);
@@ -2333,23 +2343,38 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
// unspecified in this case.
CGOpenMPRuntime::StaticRTInput StaticInit(
IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
- UB.getAddress(), ST.getAddress());
+ UB.getAddress(), ST.getAddress(),
+ StaticChunkedOne ? Chunk : nullptr);
RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
ScheduleKind, StaticInit);
JumpDest LoopExit =
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
// UB = min(UB, GlobalUB);
- EmitIgnoredExpr(S.getEnsureUpperBound());
+ if (!StaticChunkedOne)
+ EmitIgnoredExpr(S.getEnsureUpperBound());
// IV = LB;
EmitIgnoredExpr(S.getInit());
- // while (idx <= UB) { BODY; ++idx; }
- EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
- S.getInc(),
- [&S, LoopExit](CodeGenFunction &CGF) {
- CGF.EmitOMPLoopBody(S, LoopExit);
- CGF.EmitStopPoint(&S);
- },
- [](CodeGenFunction &) {});
+ // For unchunked static schedule generate:
+ //
+ // while (idx <= UB) {
+ // BODY;
+ // ++idx;
+ // }
+ //
+ // For static schedule with chunk one:
+ //
+ // while (IV <= PrevUB) {
+ // BODY;
+ // IV += ST;
+ // }
+ EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+ StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(),
+ StaticChunkedOne ? S.getDistInc() : S.getInc(),
+ [&S, LoopExit](CodeGenFunction &CGF) {
+ CGF.EmitOMPLoopBody(S, LoopExit);
+ CGF.EmitStopPoint(&S);
+ },
+ [](CodeGenFunction &) {});
EmitBlock(LoopExit.getBlock());
// Tell the runtime we are done.
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
@@ -3345,13 +3370,18 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
// iteration space is divided into chunks that are approximately equal
// in size, and at most one chunk is distributed to each team of the
// league. The size of the chunks is unspecified in this case.
+ bool StaticChunked = RT.isStaticChunked(
+ ScheduleKind, /* Chunked */ Chunk != nullptr) &&
+ isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
if (RT.isStaticNonchunked(ScheduleKind,
- /* Chunked */ Chunk != nullptr)) {
+ /* Chunked */ Chunk != nullptr) ||
+ StaticChunked) {
if (isOpenMPSimdDirective(S.getDirectiveKind()))
EmitOMPSimdInit(S, /*IsMonotonic=*/true);
CGOpenMPRuntime::StaticRTInput StaticInit(
IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
- LB.getAddress(), UB.getAddress(), ST.getAddress());
+ LB.getAddress(), UB.getAddress(), ST.getAddress(),
+ StaticChunked ? Chunk : nullptr);
RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
StaticInit);
JumpDest LoopExit =
@@ -3370,15 +3400,45 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
? S.getCombinedCond()
: S.getCond();
- // for distribute alone, codegen
- // while (idx <= UB) { BODY; ++idx; }
- // when combined with 'for' (e.g. as in 'distribute parallel for')
- // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
+ if (StaticChunked)
+ Cond = S.getCombinedDistCond();
+
+ // For static unchunked schedules generate:
+ //
+ // 1. For distribute alone, codegen
+ // while (idx <= UB) {
+ // BODY;
+ // ++idx;
+ // }
+ //
+ // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
+ // while (idx <= UB) {
+ // <CodeGen rest of pragma>(LB, UB);
+ // idx += ST;
+ // }
+ //
+ // For static chunk one schedule generate:
+ //
+ // while (IV <= GlobalUB) {
+ // <CodeGen rest of pragma>(LB, UB);
+ // LB += ST;
+ // UB += ST;
+ // UB = min(UB, GlobalUB);
+ // IV = LB;
+ // }
+ //
EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
[&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
CodeGenLoop(CGF, S, LoopExit);
},
- [](CodeGenFunction &) {});
+ [&S, StaticChunked](CodeGenFunction &CGF) {
+ if (StaticChunked) {
+ CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
+ CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
+ CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
+ CGF.EmitIgnoredExpr(S.getCombinedInit());
+ }
+ });
EmitBlock(LoopExit.getBlock());
// Tell the runtime we are done.
RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
OpenPOWER on IntegriCloud