summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
Diffstat (limited to 'clang')
-rw-r--r--clang/include/clang/AST/StmtOpenMP.h36
-rw-r--r--clang/lib/AST/StmtOpenMP.cpp12
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp12
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h16
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp9
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h2
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp116
-rw-r--r--clang/lib/Sema/SemaOpenMP.cpp17
-rw-r--r--clang/lib/Serialization/ASTReaderStmt.cpp2
-rw-r--r--clang/lib/Serialization/ASTWriterStmt.cpp2
-rw-r--r--clang/test/OpenMP/distribute_parallel_for_codegen.cpp220
-rw-r--r--clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp220
-rw-r--r--clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp78
13 files changed, 529 insertions, 213 deletions
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 170c78cf6b6..d1eedd62b37 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -392,9 +392,11 @@ class OMPLoopDirective : public OMPExecutableDirective {
CombinedConditionOffset = 25,
CombinedNextLowerBoundOffset = 26,
CombinedNextUpperBoundOffset = 27,
+ CombinedDistConditionOffset = 28,
+ CombinedParForInDistConditionOffset = 29,
// Offset to the end (and start of the following counters/updates/finals
// arrays) for combined distribute loop directives.
- CombinedDistributeEnd = 28,
+ CombinedDistributeEnd = 30,
};
/// Get the counters storage.
@@ -605,6 +607,17 @@ protected:
"expected loop bound sharing directive");
*std::next(child_begin(), CombinedNextUpperBoundOffset) = CombNUB;
}
+ void setCombinedDistCond(Expr *CombDistCond) {
+ assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+ "expected loop bound distribute sharing directive");
+ *std::next(child_begin(), CombinedDistConditionOffset) = CombDistCond;
+ }
+ void setCombinedParForInDistCond(Expr *CombParForInDistCond) {
+ assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+ "expected loop bound distribute sharing directive");
+ *std::next(child_begin(),
+ CombinedParForInDistConditionOffset) = CombParForInDistCond;
+ }
void setCounters(ArrayRef<Expr *> A);
void setPrivateCounters(ArrayRef<Expr *> A);
void setInits(ArrayRef<Expr *> A);
@@ -637,6 +650,13 @@ public:
/// Update of UpperBound for statically scheduled omp loops for
/// outer loop in combined constructs (e.g. 'distribute parallel for')
Expr *NUB;
+ /// Distribute Loop condition used when composing 'omp distribute'
+ /// with 'omp for' in a same construct when schedule is chunked.
+ Expr *DistCond;
+ /// 'omp parallel for' loop condition used when composed with
+ /// 'omp distribute' in the same construct and when schedule is
+ /// chunked and the chunk size is 1.
+ Expr *ParForInDistCond;
};
/// The expressions built for the OpenMP loop CodeGen for the
@@ -754,6 +774,8 @@ public:
DistCombinedFields.Cond = nullptr;
DistCombinedFields.NLB = nullptr;
DistCombinedFields.NUB = nullptr;
+ DistCombinedFields.DistCond = nullptr;
+ DistCombinedFields.ParForInDistCond = nullptr;
}
};
@@ -922,6 +944,18 @@ public:
return const_cast<Expr *>(reinterpret_cast<const Expr *>(
*std::next(child_begin(), CombinedNextUpperBoundOffset)));
}
+ Expr *getCombinedDistCond() const {
+ assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+ "expected loop bound distribute sharing directive");
+ return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+ *std::next(child_begin(), CombinedDistConditionOffset)));
+ }
+ Expr *getCombinedParForInDistCond() const {
+ assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+ "expected loop bound distribute sharing directive");
+ return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+ *std::next(child_begin(), CombinedParForInDistConditionOffset)));
+ }
const Stmt *getBody() const {
// This relies on the loop form is already checked by Sema.
const Stmt *Body =
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 1258af7a2d3..85a2daa0801 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -1079,6 +1079,8 @@ OMPDistributeParallelForDirective *OMPDistributeParallelForDirective::Create(
Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+ Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+ Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
Dir->HasCancel = HasCancel;
return Dir;
}
@@ -1145,6 +1147,8 @@ OMPDistributeParallelForSimdDirective::Create(
Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+ Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+ Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
return Dir;
}
@@ -1457,6 +1461,8 @@ OMPTeamsDistributeParallelForSimdDirective::Create(
Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+ Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+ Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
return Dir;
}
@@ -1524,6 +1530,8 @@ OMPTeamsDistributeParallelForDirective::Create(
Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+ Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+ Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
Dir->HasCancel = HasCancel;
return Dir;
}
@@ -1670,6 +1678,8 @@ OMPTargetTeamsDistributeParallelForDirective::Create(
Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+ Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+ Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
Dir->HasCancel = HasCancel;
return Dir;
}
@@ -1741,6 +1751,8 @@ OMPTargetTeamsDistributeParallelForSimdDirective::Create(
Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+ Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+ Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
return Dir;
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index cd3bc3a8606..47828d189c8 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -3292,6 +3292,18 @@ bool CGOpenMPRuntime::isStaticNonchunked(
return Schedule == OMP_dist_sch_static;
}
+bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
+ bool Chunked) const {
+ OpenMPSchedType Schedule =
+ getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
+ return Schedule == OMP_sch_static_chunked;
+}
+
+bool CGOpenMPRuntime::isStaticChunked(
+ OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
+ OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+ return Schedule == OMP_dist_sch_static_chunked;
+}
bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
OpenMPSchedType Schedule =
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 3fb1a1538fa..0cc1056ffab 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -890,6 +890,20 @@ public:
virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,
bool Chunked) const;
+ /// Check if the specified \a ScheduleKind is static chunked.
+ /// \param ScheduleKind Schedule kind specified in the 'schedule' clause.
+ /// \param Chunked True if chunk is specified in the clause.
+ ///
+ virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
+ bool Chunked) const;
+
+ /// Check if the specified \a ScheduleKind is static non-chunked.
+ /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause.
+ /// \param Chunked True if chunk is specified in the clause.
+ ///
+ virtual bool isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,
+ bool Chunked) const;
+
/// Check if the specified \a ScheduleKind is dynamic.
/// This kind of worksharing directive is emitted without outer loop.
/// \param ScheduleKind Schedule Kind specified in the 'schedule' clause.
@@ -1506,7 +1520,7 @@ public:
/// schedule clause.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
- llvm::Value *&Chunk) const {}
+ const Expr *&ChunkExpr) const {}
/// Emits call of the outlined function with the provided arguments,
/// translating these arguments to correct target-specific arguments.
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index c8dd1a6f7b6..101ab54d585 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -4247,8 +4247,11 @@ void CGOpenMPRuntimeNVPTX::getDefaultDistScheduleAndChunk(
void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk(
CodeGenFunction &CGF, const OMPLoopDirective &S,
OpenMPScheduleClauseKind &ScheduleKind,
- llvm::Value *&Chunk) const {
+ const Expr *&ChunkExpr) const {
ScheduleKind = OMPC_SCHEDULE_static;
- Chunk = CGF.Builder.getIntN(CGF.getContext().getTypeSize(
- S.getIterationVariable()->getType()), 1);
+ // Chunk size is 1 in this case.
+ llvm::APInt ChunkSize(32, 1);
+ ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize,
+ CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+ SourceLocation());
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index d40e6cfbdd1..1ce8a175dae 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -348,7 +348,7 @@ public:
/// Choose a default value for the schedule clause.
void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
- llvm::Value *&Chunk) const override;
+ const Expr *&ChunkExpr) const override;
private:
/// Track the execution mode when codegening directives within a target
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index c8937956f45..718b8c326dc 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2006,7 +2006,7 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
// for combined 'distribute' and 'for' the increment expression of distribute
- // is store in DistInc. For 'distribute' alone, it is in Inc.
+ // is stored in DistInc. For 'distribute' alone, it is in Inc.
Expr *IncExpr;
if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
IncExpr = S.getDistInc();
@@ -2298,22 +2298,28 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
(void)LoopScope.Privatize();
// Detect the loop schedule kind and chunk.
- llvm::Value *Chunk = nullptr;
+ const Expr *ChunkExpr = nullptr;
OpenMPScheduleTy ScheduleKind;
if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
ScheduleKind.Schedule = C->getScheduleKind();
ScheduleKind.M1 = C->getFirstScheduleModifier();
ScheduleKind.M2 = C->getSecondScheduleModifier();
- if (const Expr *Ch = C->getChunkSize()) {
- Chunk = EmitScalarExpr(Ch);
- Chunk = EmitScalarConversion(Chunk, Ch->getType(),
- S.getIterationVariable()->getType(),
- S.getBeginLoc());
- }
+ ChunkExpr = C->getChunkSize();
} else {
// Default behaviour for schedule clause.
CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
- *this, S, ScheduleKind.Schedule, Chunk);
+ *this, S, ScheduleKind.Schedule, ChunkExpr);
+ }
+ bool HasChunkSizeOne = false;
+ llvm::Value *Chunk = nullptr;
+ if (ChunkExpr) {
+ Chunk = EmitScalarExpr(ChunkExpr);
+ Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
+ S.getIterationVariable()->getType(),
+ S.getBeginLoc());
+ llvm::APSInt EvaluatedChunk;
+ if (ChunkExpr->EvaluateAsInt(EvaluatedChunk, getContext()))
+ HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
}
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
@@ -2321,8 +2327,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
// If the static schedule kind is specified or if the ordered clause is
// specified, and if no monotonic modifier is specified, the effect will
// be as if the monotonic modifier was specified.
- if (RT.isStaticNonchunked(ScheduleKind.Schedule,
- /* Chunked */ Chunk != nullptr) &&
+ bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
+ /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
+ isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
+ if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
+ /* Chunked */ Chunk != nullptr) ||
+ StaticChunkedOne) &&
!Ordered) {
if (isOpenMPSimdDirective(S.getDirectiveKind()))
EmitOMPSimdInit(S, /*IsMonotonic=*/true);
@@ -2333,23 +2343,38 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
// unspecified in this case.
CGOpenMPRuntime::StaticRTInput StaticInit(
IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
- UB.getAddress(), ST.getAddress());
+ UB.getAddress(), ST.getAddress(),
+ StaticChunkedOne ? Chunk : nullptr);
RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
ScheduleKind, StaticInit);
JumpDest LoopExit =
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
// UB = min(UB, GlobalUB);
- EmitIgnoredExpr(S.getEnsureUpperBound());
+ if (!StaticChunkedOne)
+ EmitIgnoredExpr(S.getEnsureUpperBound());
// IV = LB;
EmitIgnoredExpr(S.getInit());
- // while (idx <= UB) { BODY; ++idx; }
- EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
- S.getInc(),
- [&S, LoopExit](CodeGenFunction &CGF) {
- CGF.EmitOMPLoopBody(S, LoopExit);
- CGF.EmitStopPoint(&S);
- },
- [](CodeGenFunction &) {});
+ // For unchunked static schedule generate:
+ //
+ // while (idx <= UB) {
+ // BODY;
+ // ++idx;
+ // }
+ //
+ // For static schedule with chunk one:
+ //
+ // while (IV <= PrevUB) {
+ // BODY;
+ // IV += ST;
+ // }
+ EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+ StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(),
+ StaticChunkedOne ? S.getDistInc() : S.getInc(),
+ [&S, LoopExit](CodeGenFunction &CGF) {
+ CGF.EmitOMPLoopBody(S, LoopExit);
+ CGF.EmitStopPoint(&S);
+ },
+ [](CodeGenFunction &) {});
EmitBlock(LoopExit.getBlock());
// Tell the runtime we are done.
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
@@ -3345,13 +3370,18 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
// iteration space is divided into chunks that are approximately equal
// in size, and at most one chunk is distributed to each team of the
// league. The size of the chunks is unspecified in this case.
+ bool StaticChunked = RT.isStaticChunked(
+ ScheduleKind, /* Chunked */ Chunk != nullptr) &&
+ isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
if (RT.isStaticNonchunked(ScheduleKind,
- /* Chunked */ Chunk != nullptr)) {
+ /* Chunked */ Chunk != nullptr) ||
+ StaticChunked) {
if (isOpenMPSimdDirective(S.getDirectiveKind()))
EmitOMPSimdInit(S, /*IsMonotonic=*/true);
CGOpenMPRuntime::StaticRTInput StaticInit(
IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
- LB.getAddress(), UB.getAddress(), ST.getAddress());
+ LB.getAddress(), UB.getAddress(), ST.getAddress(),
+ StaticChunked ? Chunk : nullptr);
RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
StaticInit);
JumpDest LoopExit =
@@ -3370,15 +3400,45 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
? S.getCombinedCond()
: S.getCond();
- // for distribute alone, codegen
- // while (idx <= UB) { BODY; ++idx; }
- // when combined with 'for' (e.g. as in 'distribute parallel for')
- // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
+ if (StaticChunked)
+ Cond = S.getCombinedDistCond();
+
+ // For static unchunked schedules generate:
+ //
+ // 1. For distribute alone, codegen
+ // while (idx <= UB) {
+ // BODY;
+ // ++idx;
+ // }
+ //
+ // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
+ // while (idx <= UB) {
+ // <CodeGen rest of pragma>(LB, UB);
+ // idx += ST;
+ // }
+ //
+ // For static chunk one schedule generate:
+ //
+ // while (IV <= GlobalUB) {
+ // <CodeGen rest of pragma>(LB, UB);
+ // LB += ST;
+ // UB += ST;
+ // UB = min(UB, GlobalUB);
+ // IV = LB;
+ // }
+ //
EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
[&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
CodeGenLoop(CGF, S, LoopExit);
},
- [](CodeGenFunction &) {});
+ [&S, StaticChunked](CodeGenFunction &CGF) {
+ if (StaticChunked) {
+ CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
+ CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
+ CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
+ CGF.EmitIgnoredExpr(S.getCombinedInit());
+ }
+ });
EmitBlock(LoopExit.getBlock());
// Tell the runtime we are done.
RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index a6d1a1199c0..e08e77603e9 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -410,7 +410,7 @@ public:
}
return IsDuplicate;
}
-
+
/// Set default data sharing attribute to none.
void setDefaultDSANone(SourceLocation Loc) {
assert(!isStackEmpty());
@@ -5296,6 +5296,12 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
? SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get())
: SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(),
NumIterations.get());
+ ExprResult CombDistCond;
+ if (isOpenMPLoopBoundSharingDirective(DKind)) {
+ CombDistCond =
+ SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), NumIterations.get());
+ }
+
ExprResult CombCond;
if (isOpenMPLoopBoundSharingDirective(DKind)) {
CombCond =
@@ -5370,7 +5376,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
// on PrevUB instead of NumIterations - used to implement 'for' when found
// in combination with 'distribute', like in 'distribute parallel for'
SourceLocation DistIncLoc = AStmt->getBeginLoc();
- ExprResult DistCond, DistInc, PrevEUB;
+ ExprResult DistCond, DistInc, PrevEUB, ParForInDistCond;
if (isOpenMPLoopBoundSharingDirective(DKind)) {
DistCond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get());
assert(DistCond.isUsable() && "distribute cond expr was not built");
@@ -5393,6 +5399,11 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
PrevEUB = SemaRef.BuildBinOp(CurScope, DistIncLoc, BO_Assign, UB.get(),
CondOp.get());
PrevEUB = SemaRef.ActOnFinishFullExpr(PrevEUB.get());
+
+ // Build IV <= PrevUB to be used in parallel for is in combination with
+ // a distribute directive with schedule(static, 1)
+ ParForInDistCond =
+ SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), PrevUB.get());
}
// Build updates and final values of the loop counters.
@@ -5516,6 +5527,8 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
Built.DistCombinedFields.Cond = CombCond.get();
Built.DistCombinedFields.NLB = CombNextLB.get();
Built.DistCombinedFields.NUB = CombNextUB.get();
+ Built.DistCombinedFields.DistCond = CombDistCond.get();
+ Built.DistCombinedFields.ParForInDistCond = ParForInDistCond.get();
return NestedLoopCount;
}
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 466dfa5f531..1ad33c5dc34 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -1875,6 +1875,8 @@ void ASTStmtReader::VisitOMPLoopDirective(OMPLoopDirective *D) {
D->setCombinedCond(Record.readSubExpr());
D->setCombinedNextLowerBound(Record.readSubExpr());
D->setCombinedNextUpperBound(Record.readSubExpr());
+ D->setCombinedDistCond(Record.readSubExpr());
+ D->setCombinedParForInDistCond(Record.readSubExpr());
}
SmallVector<Expr *, 4> Sub;
unsigned CollapsedNum = D->getCollapsedNumber();
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 3fce69142d3..e7f9f35dcc2 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -1876,6 +1876,8 @@ void ASTStmtWriter::VisitOMPLoopDirective(OMPLoopDirective *D) {
Record.AddStmt(D->getCombinedCond());
Record.AddStmt(D->getCombinedNextLowerBound());
Record.AddStmt(D->getCombinedNextUpperBound());
+ Record.AddStmt(D->getCombinedDistCond());
+ Record.AddStmt(D->getCombinedParForInDistCond());
}
for (auto I : D->counters()) {
Record.AddStmt(I);
diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
index a7700a3421a..6625ee06fa9 100644
--- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
@@ -406,19 +406,27 @@ int main() {
for (int i = 0; i < n; ++i) {
a[i] = b[i] + c[i];
// LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
- // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
- // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
- // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
- // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: [[OMP_IV:%.+]] = alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: [[OMP_LB:%.+]] = alloca
+ // LAMBDA: [[OMP_UB:%.+]] = alloca
+ // LAMBDA: [[OMP_ST:%.+]] = alloca
- // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
// LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
- // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
- // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]:
// check EUB for distribute
// LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
- // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
+ // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}
// LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
// LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
// LAMBDA-DAG: [[EUB_TRUE]]:
@@ -437,18 +445,10 @@ int main() {
// check exit condition
// LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
- // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
- // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
- // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
- // LAMBDA: [[DIST_OUTER_LOOP_BODY]]:
- // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
- // LAMBDA: [[DIST_INNER_LOOP_HEADER]]:
- // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
- // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
- // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
- // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+ // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
+ // LAMBDA-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
+ // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
+ // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
// check that PrevLB and PrevUB are passed to the 'for'
// LAMBDA: [[DIST_INNER_LOOP_BODY]]:
@@ -467,13 +467,6 @@ int main() {
// LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
// LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
// LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
- // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]]
-
- // LAMBDA: [[DIST_INNER_LOOP_END]]:
- // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
- // LAMBDA: [[DIST_OUTER_LOOP_INC]]:
- // check NextLB and NextUB
// LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
// LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
// LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
@@ -482,10 +475,31 @@ int main() {
// LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
// LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
// LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
- // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]]
- // outer loop exit
- // LAMBDA: [[DIST_OUTER_LOOP_END]]:
+ // Update UB
+ // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+ // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
+ // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+ // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+ // LAMBDA-DAG: [[EUB_TRUE_1]]:
+ // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}}
+ // LAMBDA: br label %[[EUB_END_1:.+]]
+ // LAMBDA-DAG: [[EUB_FALSE_1]]:
+ // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+ // LAMBDA: br label %[[EUB_END_1]]
+ // LAMBDA-DAG: [[EUB_END_1]]:
+ // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+ // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+ // Store LB in IV
+ // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+ // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+ // LAMBDA: [[DIST_INNER_LOOP_END]]:
+ // LAMBDA: br label %[[LOOP_EXIT:.+]]
+
+ // loop exit
+ // LAMBDA: [[LOOP_EXIT]]:
// LAMBDA-DAG: call void @__kmpc_for_static_fini(
// LAMBDA: ret
@@ -1154,19 +1168,28 @@ int main() {
for (int i = 0; i < n; ++i) {
a[i] = b[i] + c[i];
// CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
- // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
- // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
- // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
- // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: [[OMP_IV:%.+]] = alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: [[OMP_LB:%.+]] = alloca
+ // CHECK: [[OMP_UB:%.+]] = alloca
+ // CHECK: [[OMP_ST:%.+]] = alloca
// unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
- // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
- // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
// check EUB for distribute
// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
- // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+ // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}
// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
// CHECK-DAG: [[EUB_TRUE]]:
@@ -1185,18 +1208,10 @@ int main() {
// check exit condition
// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
- // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
- // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
- // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
- // CHECK: [[DIST_OUTER_LOOP_BODY]]:
- // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
- // CHECK: [[DIST_INNER_LOOP_HEADER]]:
- // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
- // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
- // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
- // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+ // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
+ // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
+ // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
+ // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
// check that PrevLB and PrevUB are passed to the 'for'
// CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1215,13 +1230,6 @@ int main() {
// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
- // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
- // CHECK: [[DIST_INNER_LOOP_END]]:
- // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
- // CHECK: [[DIST_OUTER_LOOP_INC]]:
- // check NextLB and NextUB
// CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
// CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
// CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
@@ -1230,10 +1238,31 @@ int main() {
// CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
// CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
// CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
- // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
- // outer loop exit
- // CHECK: [[DIST_OUTER_LOOP_END]]:
+ // Update UB
+ // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+ // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
+ // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+ // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+ // CHECK-DAG: [[EUB_TRUE_1]]:
+ // CHECK: [[NUM_IT_3:%.+]] = load{{.+}}
+ // CHECK: br label %[[EUB_END_1:.+]]
+ // CHECK-DAG: [[EUB_FALSE_1]]:
+ // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+ // CHECK: br label %[[EUB_END_1]]
+ // CHECK-DAG: [[EUB_END_1]]:
+ // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+ // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+ // Store LB in IV
+ // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+ // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+ // CHECK: [[DIST_INNER_LOOP_END]]:
+ // CHECK: br label %[[LOOP_EXIT:.+]]
+
+ // loop exit
+ // CHECK: [[LOOP_EXIT]]:
// CHECK-DAG: call void @__kmpc_for_static_fini(
// CHECK: ret
@@ -1867,19 +1896,28 @@ int main() {
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
// CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
-// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
-// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
-// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
-// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: [[OMP_IV:%.+]] = alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: [[OMP_LB:%.+]] = alloca
+// CHECK: [[OMP_UB:%.+]] = alloca
+// CHECK: [[OMP_ST:%.+]] = alloca
// unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
-// CHECK: [[DIST_OUTER_LOOP_HEADER]]:
// check EUB for distribute
// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-// CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+// CHECK: [[NUM_IT_1:%.+]] = load{{.+}}
// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
// CHECK-DAG: [[EUB_TRUE]]:
@@ -1898,18 +1936,10 @@ int main() {
// check exit condition
// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_BODY]]:
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-// CHECK: [[DIST_INNER_LOOP_HEADER]]:
-// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
+// CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
+// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
+// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
// check that PrevLB and PrevUB are passed to the 'for'
// CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1928,13 +1958,6 @@ int main() {
// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-// CHECK: [[DIST_INNER_LOOP_END]]:
-// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_INC]]:
-// check NextLB and NextUB
// CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
// CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
// CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
@@ -1943,10 +1966,31 @@ int main() {
// CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
// CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
// CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
-// outer loop exit
-// CHECK: [[DIST_OUTER_LOOP_END]]:
+// Update UB
+// CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+// CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
+// CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+// CHECK-DAG: [[EUB_TRUE_1]]:
+// CHECK: [[NUM_IT_3:%.+]] = load{{.+}}
+// CHECK: br label %[[EUB_END_1:.+]]
+// CHECK-DAG: [[EUB_FALSE_1]]:
+// CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END_1]]
+// CHECK-DAG: [[EUB_END_1]]:
+// CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+// CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+// Store LB in IV
+// CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+// CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+// CHECK: [[DIST_INNER_LOOP_END]]:
+// CHECK: br label %[[LOOP_EXIT:.+]]
+
+// loop exit
+// CHECK: [[LOOP_EXIT]]:
// CHECK-DAG: call void @__kmpc_for_static_fini(
// CHECK: ret
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
index 129d90b746c..4d20d06e6ec 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
@@ -405,19 +405,27 @@ int main() {
for (int i = 0; i < n; ++i) {
a[i] = b[i] + c[i];
// LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
- // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
- // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
- // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
- // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: [[OMP_IV:%.+]] = alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: alloca
+ // LAMBDA: [[OMP_LB:%.+]] = alloca
+ // LAMBDA: [[OMP_UB:%.+]] = alloca
+ // LAMBDA: [[OMP_ST:%.+]] = alloca
- // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
// LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
- // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
- // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]:
// check EUB for distribute
// LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
- // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
+ // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}
// LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
// LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
// LAMBDA-DAG: [[EUB_TRUE]]:
@@ -436,18 +444,10 @@ int main() {
// check exit condition
// LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
- // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
- // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
- // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
- // LAMBDA: [[DIST_OUTER_LOOP_BODY]]:
- // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
- // LAMBDA: [[DIST_INNER_LOOP_HEADER]]:
- // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
- // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
- // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
- // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+ // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
+ // LAMBDA-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
+ // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
+ // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
// check that PrevLB and PrevUB are passed to the 'for'
// LAMBDA: [[DIST_INNER_LOOP_BODY]]:
@@ -466,13 +466,6 @@ int main() {
// LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
// LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
// LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
- // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]]
-
- // LAMBDA: [[DIST_INNER_LOOP_END]]:
- // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
- // LAMBDA: [[DIST_OUTER_LOOP_INC]]:
- // check NextLB and NextUB
// LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
// LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
// LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
@@ -481,10 +474,31 @@ int main() {
// LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
// LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
// LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
- // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]]
- // outer loop exit
- // LAMBDA: [[DIST_OUTER_LOOP_END]]:
+ // Update UB
+ // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+ // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
+ // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+ // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+ // LAMBDA-DAG: [[EUB_TRUE_1]]:
+ // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}}
+ // LAMBDA: br label %[[EUB_END_1:.+]]
+ // LAMBDA-DAG: [[EUB_FALSE_1]]:
+ // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+ // LAMBDA: br label %[[EUB_END_1]]
+ // LAMBDA-DAG: [[EUB_END_1]]:
+ // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+ // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+ // Store LB in IV
+ // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+ // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+ // LAMBDA: [[DIST_INNER_LOOP_END]]:
+ // LAMBDA: br label %[[LOOP_EXIT:.+]]
+
+ // loop exit
+ // LAMBDA: [[LOOP_EXIT]]:
// LAMBDA-DAG: call void @__kmpc_for_static_fini(
// LAMBDA: ret
@@ -1153,19 +1167,28 @@ int main() {
for (int i = 0; i < n; ++i) {
a[i] = b[i] + c[i];
// CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
- // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
- // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
- // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
- // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: [[OMP_IV:%.+]] = alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: alloca
+ // CHECK: [[OMP_LB:%.+]] = alloca
+ // CHECK: [[OMP_UB:%.+]] = alloca
+ // CHECK: [[OMP_ST:%.+]] = alloca
// unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
- // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
- // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
// check EUB for distribute
// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
- // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+ // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}
// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
// CHECK-DAG: [[EUB_TRUE]]:
@@ -1184,18 +1207,10 @@ int main() {
// check exit condition
// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
- // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
- // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
- // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
- // CHECK: [[DIST_OUTER_LOOP_BODY]]:
- // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
- // CHECK: [[DIST_INNER_LOOP_HEADER]]:
- // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
- // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
- // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
- // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+ // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
+ // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
+ // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
+ // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
// check that PrevLB and PrevUB are passed to the 'for'
// CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1214,13 +1229,6 @@ int main() {
// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
- // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
- // CHECK: [[DIST_INNER_LOOP_END]]:
- // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
- // CHECK: [[DIST_OUTER_LOOP_INC]]:
- // check NextLB and NextUB
// CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
// CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
// CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
@@ -1229,10 +1237,31 @@ int main() {
// CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
// CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
// CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
- // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
- // outer loop exit
- // CHECK: [[DIST_OUTER_LOOP_END]]:
+ // Update UB
+ // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+ // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
+ // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+ // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+ // CHECK-DAG: [[EUB_TRUE_1]]:
+ // CHECK: [[NUM_IT_3:%.+]] = load{{.+}}
+ // CHECK: br label %[[EUB_END_1:.+]]
+ // CHECK-DAG: [[EUB_FALSE_1]]:
+ // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+ // CHECK: br label %[[EUB_END_1]]
+ // CHECK-DAG: [[EUB_END_1]]:
+ // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+ // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+ // Store LB in IV
+ // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+ // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+ // CHECK: [[DIST_INNER_LOOP_END]]:
+ // CHECK: br label %[[LOOP_EXIT:.+]]
+
+ // loop exit
+ // CHECK: [[LOOP_EXIT]]:
// CHECK-DAG: call void @__kmpc_for_static_fini(
// CHECK: ret
@@ -1866,19 +1895,28 @@ int main() {
// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
// CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
-// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
-// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
-// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
-// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: [[OMP_IV:%.+]] = alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: [[OMP_LB:%.+]] = alloca
+// CHECK: [[OMP_UB:%.+]] = alloca
+// CHECK: [[OMP_ST:%.+]] = alloca
// unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
-// CHECK: [[DIST_OUTER_LOOP_HEADER]]:
// check EUB for distribute
// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-// CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+// CHECK: [[NUM_IT_1:%.+]] = load{{.+}}
// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], [[NUM_IT_1]]
// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
// CHECK-DAG: [[EUB_TRUE]]:
@@ -1897,18 +1935,10 @@ int main() {
// check exit condition
// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_BODY]]:
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-// CHECK: [[DIST_INNER_LOOP_HEADER]]:
-// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
+// CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
+// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
+// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
// check that PrevLB and PrevUB are passed to the 'for'
// CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1927,13 +1957,6 @@ int main() {
// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-// CHECK: [[DIST_INNER_LOOP_END]]:
-// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_INC]]:
-// check NextLB and NextUB
// CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
// CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
// CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
@@ -1942,10 +1965,31 @@ int main() {
// CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
// CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
// CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
-// outer loop exit
-// CHECK: [[DIST_OUTER_LOOP_END]]:
+// Update UB
+// CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+// CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
+// CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+// CHECK-DAG: [[EUB_TRUE_1]]:
+// CHECK: [[NUM_IT_3:%.+]] = load{{.+}}
+// CHECK: br label %[[EUB_END_1:.+]]
+// CHECK-DAG: [[EUB_FALSE_1]]:
+// CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END_1]]
+// CHECK-DAG: [[EUB_END_1]]:
+// CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+// CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+// Store LB in IV
+// CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+// CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+// CHECK: [[DIST_INNER_LOOP_END]]:
+// CHECK: br label %[[LOOP_EXIT:.+]]
+
+// loop exit
+// CHECK: [[LOOP_EXIT]]:
// CHECK-DAG: call void @__kmpc_for_static_fini(
// CHECK: ret
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
index 01c753355cd..df284b9a47a 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
@@ -112,13 +112,89 @@ int bar(int n){
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: ret void
+// Distribute with collapse(2)
// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: [[OMP_IV:%.+]] = alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: [[OMP_LB:%.+]] = alloca
+// CHECK: [[OMP_UB:%.+]] = alloca
+// CHECK: [[OMP_ST:%.+]] = alloca
// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
-// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
+
+// check EUB for distribute
+// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}} [[OMP_UB_VAL_1]], 99
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
+// CHECK-DAG: [[EUB_TRUE]]:
+// CHECK: br label %[[EUB_END:.+]]
+// CHECK-DAG: [[EUB_FALSE]]:
+// CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END]]
+// CHECK-DAG: [[EUB_END]]:
+// CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ 99, %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
+// CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
+
+// initialize omp.iv
+// CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
+// CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
+
+// check exit condition
+// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
+// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], 100
+// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+
+// check that PrevLB and PrevUB are passed to the 'for'
+// CHECK: [[DIST_INNER_LOOP_BODY]]:
+// CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
+// CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
+// CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
+// CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
+
+// check that distlb and distub are properly passed to the outlined function
+// CHECK-32: {{call|invoke}} void [[OUTL4:@.+]]({{.*}} i32 [[OMP_PREV_LB]], i32 [[OMP_PREV_UB]]
+// CHECK-64: {{call|invoke}} void [[OUTL4:@.+]]({{.*}} i64 [[OMP_PREV_LB_EXT]], i64 [[OMP_PREV_UB_EXT]]
+
+// check DistInc
+// CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
+// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
+// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
+// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
+// CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+// CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
+// CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
+// CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
+// CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+// CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
+// CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
+// CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
+
+// Update UB
+// CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+// CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], 99
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+// CHECK-DAG: [[EUB_TRUE_1]]:
+// CHECK: br label %[[EUB_END_1:.+]]
+// CHECK-DAG: [[EUB_FALSE_1]]:
+// CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END_1]]
+// CHECK-DAG: [[EUB_END_1]]:
+// CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ 99, %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+// CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+// Store LB in IV
+// CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+// CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+// CHECK: [[DIST_INNER_LOOP_END]]:
// CHECK: call void @__kmpc_for_static_fini(
// CHECK: call void @__kmpc_spmd_kernel_deinit()
// CHECK: ret void
OpenPOWER on IntegriCloud