summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPredication.cpp258
-rw-r--r--llvm/test/Transforms/LoopPredication/basic.ll344
-rw-r--r--llvm/test/Transforms/LoopPredication/nested.ll83
-rw-r--r--llvm/test/Transforms/LoopPredication/visited.ll5
4 files changed, 506 insertions, 184 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 9b12ba18044..84577dd182a 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -34,6 +34,120 @@
// else
// deoptimize
//
+// It's tempting to rely on SCEV here, but it has proven to be problematic.
+// Generally the facts SCEV provides about the increment step of add
+// recurrences are true if the backedge of the loop is taken, which implicitly
+// assumes that the guard doesn't fail. Using these facts to optimize the
+// guard results in a circular logic where the guard is optimized under the
+// assumption that it never fails.
+//
+// For example, in the loop below the induction variable will be marked as nuw
+// basing on the guard. Basing on nuw the guard predicate will be considered
+// monotonic. Given a monotonic condition it's tempting to replace the induction
+// variable in the condition with its value on the last iteration. But this
+// transformation is not correct, e.g. e = 4, b = 5 breaks the loop.
+//
+// for (int i = b; i != e; i++)
+// guard(i u< len)
+//
+// One of the ways to reason about this problem is to use an inductive proof
+// approach. Given the loop:
+//
+// if (B(Start)) {
+// do {
+// I = PHI(Start, I.INC)
+// I.INC = I + Step
+// guard(G(I));
+// } while (B(I.INC));
+// }
+//
+// where B(x) and G(x) are predicates that map integers to booleans, we want a
+// loop invariant expression M such the following program has the same semantics
+// as the above:
+//
+// if (B(Start)) {
+// do {
+// I = PHI(Start, I.INC)
+// I.INC = I + Step
+// guard(G(Start) && M);
+// } while (B(I.INC));
+// }
+//
+// One solution for M is M = forall X . (G(X) && B(X + Step)) => G(X + Step)
+//
+// Informal proof that the transformation above is correct:
+//
+// By the definition of guards we can rewrite the guard condition to:
+// G(I) && G(Start) && M
+//
+// Let's prove that for each iteration of the loop:
+// G(Start) && M => G(I)
+// And the condition above can be simplified to G(Start) && M.
+//
+// Induction base.
+// G(Start) && M => G(Start)
+//
+// Induction step. Assuming G(Start) && M => G(I) on the subsequent
+// iteration:
+//
+// B(I + Step) is true because it's the backedge condition.
+// G(I) is true because the backedge is guarded by this condition.
+//
+// So M = forall X . (G(X) && B(X + Step)) => G(X + Step) implies
+// G(I + Step).
+//
+// Note that we can use anything stronger than M, i.e. any condition which
+// implies M.
+//
+// For now the transformation is limited to the following case:
+// * The loop has a single latch with either ult or slt icmp condition.
+// * The step of the IV used in the latch condition is 1.
+// * The IV of the latch condition is the same as the post increment IV of the
+// guard condition.
+// * The guard condition is ult.
+//
+// In this case the latch is of the from:
+// ++i u< latchLimit or ++i s< latchLimit
+// and the guard is of the form:
+// i u< guardLimit
+//
+// For the unsigned latch comparison case M is:
+// forall X . X u< guardLimit && (X + 1) u< latchLimit =>
+// (X + 1) u< guardLimit
+//
+// This is true if latchLimit u<= guardLimit since then
+// (X + 1) u< latchLimit u<= guardLimit == (X + 1) u< guardLimit.
+//
+// So the widened condition is:
+// i.start u< guardLimit && latchLimit u<= guardLimit
+//
+// For the signed latch comparison case M is:
+// forall X . X u< guardLimit && (X + 1) s< latchLimit =>
+// (X + 1) u< guardLimit
+//
+// The only way the antecedent can be true and the consequent can be false is
+// if
+// X == guardLimit - 1
+// (and guardLimit is non-zero, but we won't use this latter fact).
+// If X == guardLimit - 1 then the second half of the antecedent is
+// guardLimit s< latchLimit
+// and its negation is
+// latchLimit s<= guardLimit.
+//
+// In other words, if latchLimit s<= guardLimit then:
+// (the ranges below are written in ConstantRange notation, where [A, B) is the
+// set for (I = A; I != B; I++ /*maywrap*/) yield(I);)
+//
+// forall X . X u< guardLimit && (X + 1) s< latchLimit => (X + 1) u< guardLimit
+// == forall X . X u< guardLimit && (X + 1) s< guardLimit => (X + 1) u< guardLimit
+// == forall X . X in [0, guardLimit) && (X + 1) in [INT_MIN, guardLimit) => (X + 1) in [0, guardLimit)
+// == forall X . X in [0, guardLimit) && X in [INT_MAX, guardLimit-1) => X in [-1, guardLimit-1)
+// == forall X . X in [0, guardLimit-1) => X in [-1, guardLimit-1)
+// == true
+//
+// So the widened condition is:
+// i.start u< guardLimit && latchLimit s<= guardLimit
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopPredication.h"
@@ -75,8 +189,16 @@ class LoopPredication {
Loop *L;
const DataLayout *DL;
BasicBlock *Preheader;
+ LoopICmp LatchCheck;
- Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI);
+ Optional<LoopICmp> parseLoopICmp(ICmpInst *ICI) {
+ return parseLoopICmp(ICI->getPredicate(), ICI->getOperand(0),
+ ICI->getOperand(1));
+ }
+ Optional<LoopICmp> parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
+ Value *RHS);
+
+ Optional<LoopICmp> parseLoopLatchICmp();
Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder,
ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
@@ -135,11 +257,8 @@ PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
}
Optional<LoopPredication::LoopICmp>
-LoopPredication::parseLoopICmp(ICmpInst *ICI) {
- ICmpInst::Predicate Pred = ICI->getPredicate();
-
- Value *LHS = ICI->getOperand(0);
- Value *RHS = ICI->getOperand(1);
+LoopPredication::parseLoopICmp(ICmpInst::Predicate Pred, Value *LHS,
+ Value *RHS) {
const SCEV *LHSS = SE->getSCEV(LHS);
if (isa<SCEVCouldNotCompute>(LHSS))
return None;
@@ -165,6 +284,8 @@ Value *LoopPredication::expandCheck(SCEVExpander &Expander,
IRBuilder<> &Builder,
ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS, Instruction *InsertAt) {
+ // TODO: we can check isLoopEntryGuardedByCond before emitting the check
+
Type *Ty = LHS->getType();
assert(Ty == RHS->getType() && "expandCheck operands have different types?");
Value *LHSV = Expander.expandCodeFor(LHS, Ty, InsertAt);
@@ -181,51 +302,54 @@ Optional<Value *> LoopPredication::widenICmpRangeCheck(ICmpInst *ICI,
DEBUG(dbgs() << "Analyzing ICmpInst condition:\n");
DEBUG(ICI->dump());
+ // parseLoopStructure guarantees that the latch condition is:
+ // ++i u< latchLimit or ++i s< latchLimit
+ // We are looking for the range checks of the form:
+ // i u< guardLimit
auto RangeCheck = parseLoopICmp(ICI);
if (!RangeCheck) {
DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
return None;
}
-
- ICmpInst::Predicate Pred = RangeCheck->Pred;
- const SCEVAddRecExpr *IndexAR = RangeCheck->IV;
- const SCEV *RHSS = RangeCheck->Limit;
-
- auto CanExpand = [this](const SCEV *S) {
- return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE);
- };
- if (!CanExpand(RHSS))
+ if (RangeCheck->Pred != ICmpInst::ICMP_ULT) {
+ DEBUG(dbgs() << "Unsupported range check predicate(" << RangeCheck->Pred
+ << ")!\n");
return None;
-
- DEBUG(dbgs() << "IndexAR: ");
- DEBUG(IndexAR->dump());
-
- bool IsIncreasing = false;
- if (!SE->isMonotonicPredicate(IndexAR, Pred, IsIncreasing))
- return None;
-
- // If the predicate is increasing the condition can change from false to true
- // as the loop progresses, in this case take the value on the first iteration
- // for the widened check. Otherwise the condition can change from true to
- // false as the loop progresses, so take the value on the last iteration.
- const SCEV *NewLHSS = IsIncreasing
- ? IndexAR->getStart()
- : SE->getSCEVAtScope(IndexAR, L->getParentLoop());
- if (NewLHSS == IndexAR) {
- DEBUG(dbgs() << "Can't compute NewLHSS!\n");
+ }
+ auto *RangeCheckIV = RangeCheck->IV;
+ auto *PostIncRangeCheckIV = RangeCheckIV->getPostIncExpr(*SE);
+ if (LatchCheck.IV != PostIncRangeCheckIV) {
+ DEBUG(dbgs() << "Post increment range check IV (" << *PostIncRangeCheckIV
+ << ") is not the same as latch IV (" << *LatchCheck.IV
+ << ")!\n");
return None;
}
+ assert(RangeCheckIV->getStepRecurrence(*SE)->isOne() && "must be one");
+ const SCEV *Start = RangeCheckIV->getStart();
- DEBUG(dbgs() << "NewLHSS: ");
- DEBUG(NewLHSS->dump());
+ // Generate the widened condition. See the file header comment for reasoning.
+ // If the latch condition is unsigned:
+ // i.start u< guardLimit && latchLimit u<= guardLimit
+ // If the latch condition is signed:
+ // i.start u< guardLimit && latchLimit s<= guardLimit
- if (!CanExpand(NewLHSS))
- return None;
+ auto LimitCheckPred = ICmpInst::isSigned(LatchCheck.Pred)
+ ? ICmpInst::ICMP_SLE
+ : ICmpInst::ICMP_ULE;
- DEBUG(dbgs() << "NewLHSS is loop invariant and safe to expand. Expand!\n");
+ auto CanExpand = [this](const SCEV *S) {
+ return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE);
+ };
+ if (!CanExpand(Start) || !CanExpand(LatchCheck.Limit) ||
+ !CanExpand(RangeCheck->Limit))
+ return None;
Instruction *InsertAt = Preheader->getTerminator();
- return expandCheck(Expander, Builder, Pred, NewLHSS, RHSS, InsertAt);
+ auto *FirstIterationCheck = expandCheck(Expander, Builder, RangeCheck->Pred,
+ Start, RangeCheck->Limit, InsertAt);
+ auto *LimitCheck = expandCheck(Expander, Builder, LimitCheckPred,
+ LatchCheck.Limit, RangeCheck->Limit, InsertAt);
+ return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
}
bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
@@ -288,6 +412,59 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
return true;
}
+Optional<LoopPredication::LoopICmp> LoopPredication::parseLoopLatchICmp() {
+ using namespace PatternMatch;
+
+ BasicBlock *LoopLatch = L->getLoopLatch();
+ if (!LoopLatch) {
+ DEBUG(dbgs() << "The loop doesn't have a single latch!\n");
+ return None;
+ }
+
+ ICmpInst::Predicate Pred;
+ Value *LHS, *RHS;
+ BasicBlock *TrueDest, *FalseDest;
+
+ if (!match(LoopLatch->getTerminator(),
+ m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TrueDest,
+ FalseDest))) {
+ DEBUG(dbgs() << "Failed to match the latch terminator!\n");
+ return None;
+ }
+ assert((TrueDest == L->getHeader() || FalseDest == L->getHeader()) &&
+ "One of the latch's destinations must be the header");
+ if (TrueDest != L->getHeader())
+ Pred = ICmpInst::getInversePredicate(Pred);
+
+ auto Result = parseLoopICmp(Pred, LHS, RHS);
+ if (!Result) {
+ DEBUG(dbgs() << "Failed to parse the loop latch condition!\n");
+ return None;
+ }
+
+ if (Result->Pred != ICmpInst::ICMP_ULT &&
+ Result->Pred != ICmpInst::ICMP_SLT) {
+ DEBUG(dbgs() << "Unsupported loop latch predicate(" << Result->Pred
+ << ")!\n");
+ return None;
+ }
+
+ // Check affine first, so if it's not we don't try to compute the step
+ // recurrence.
+ if (!Result->IV->isAffine()) {
+ DEBUG(dbgs() << "The induction variable is not affine!\n");
+ return None;
+ }
+
+ auto *Step = Result->IV->getStepRecurrence(*SE);
+ if (!Step->isOne()) {
+ DEBUG(dbgs() << "Unsupported loop stride(" << *Step << ")!\n");
+ return None;
+ }
+
+ return Result;
+}
+
bool LoopPredication::runOnLoop(Loop *Loop) {
L = Loop;
@@ -308,6 +485,11 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
if (!Preheader)
return false;
+ auto LatchCheckOpt = parseLoopLatchICmp();
+ if (!LatchCheckOpt)
+ return false;
+ LatchCheck = *LatchCheckOpt;
+
// Collect all the guards into a vector and process later, so as not
// to invalidate the instruction iterator.
SmallVector<IntrinsicInst *, 4> Guards;
diff --git a/llvm/test/Transforms/LoopPredication/basic.ll b/llvm/test/Transforms/LoopPredication/basic.ll
index 6ce07819cb0..a4b4e742a10 100644
--- a/llvm/test/Transforms/LoopPredication/basic.ll
+++ b/llvm/test/Transforms/LoopPredication/basic.ll
@@ -11,8 +11,9 @@ entry:
loop.preheader:
; CHECK: loop.preheader:
-; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
-; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length
+; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
; CHECK-NEXT: br label %loop
br label %loop
@@ -46,8 +47,9 @@ entry:
loop.preheader:
; CHECK: loop.preheader:
-; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
-; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length
+; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
; CHECK-NEXT: br label %loop
br label %loop
@@ -73,44 +75,35 @@ exit:
ret i32 %result
}
-
-define i32 @two_range_checks(i32* %array.1, i32 %length.1,
- i32* %array.2, i32 %length.2, i32 %n) {
-; CHECK-LABEL: @two_range_checks
+define i32 @signed_loop_0_to_n_ult_check(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @signed_loop_0_to_n_ult_check
entry:
- %tmp5 = icmp eq i32 %n, 0
+ %tmp5 = icmp sle i32 %n, 0
br i1 %tmp5, label %exit, label %loop.preheader
loop.preheader:
; CHECK: loop.preheader:
-; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
-; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}}
-; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2}}
+; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
; CHECK-NEXT: br label %loop
br label %loop
loop:
; CHECK: loop:
-; CHECK: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]]
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
%loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
%i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
- %within.bounds.1 = icmp ult i32 %i, %length.1
- %within.bounds.2 = icmp ult i32 %i, %length.2
- %within.bounds = and i1 %within.bounds.1, %within.bounds.2
+ %within.bounds = icmp ult i32 %i, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%i.i64 = zext i32 %i to i64
- %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64
- %array.1.i = load i32, i32* %array.1.i.ptr, align 4
- %loop.acc.1 = add i32 %loop.acc, %array.1.i
-
- %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64
- %array.2.i = load i32, i32* %array.2.i.ptr, align 4
- %loop.acc.next = add i32 %loop.acc.1, %array.2.i
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
%i.next = add nuw i32 %i, 1
- %continue = icmp ult i32 %i.next, %n
+ %continue = icmp slt i32 %i.next, %n
br i1 %continue, label %loop, label %exit
exit:
@@ -118,52 +111,33 @@ exit:
ret i32 %result
}
-define i32 @three_range_checks(i32* %array.1, i32 %length.1,
- i32* %array.2, i32 %length.2,
- i32* %array.3, i32 %length.3, i32 %n) {
-; CHECK-LABEL: @three_range_checks
+define i32 @unsupported_latch_pred_loop_0_to_n(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @unsupported_latch_pred_loop_0_to_n
entry:
- %tmp5 = icmp eq i32 %n, 0
+ %tmp5 = icmp sle i32 %n, 0
br i1 %tmp5, label %exit, label %loop.preheader
loop.preheader:
; CHECK: loop.preheader:
-; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
-; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}}
-; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}}
-; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.{{1|2|3}}
; CHECK-NEXT: br label %loop
br label %loop
loop:
; CHECK: loop:
-; CHECK: [[wide_cond_and:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]]
-; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_and]], [[wide_cond_3]]
-; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+; CHECK: %within.bounds = icmp ult i32 %i, %length
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
%i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
- %within.bounds.1 = icmp ult i32 %i, %length.1
- %within.bounds.2 = icmp ult i32 %i, %length.2
- %within.bounds.3 = icmp ult i32 %i, %length.3
- %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2
- %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3
+ %within.bounds = icmp ult i32 %i, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%i.i64 = zext i32 %i to i64
- %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64
- %array.1.i = load i32, i32* %array.1.i.ptr, align 4
- %loop.acc.1 = add i32 %loop.acc, %array.1.i
-
- %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64
- %array.2.i = load i32, i32* %array.2.i.ptr, align 4
- %loop.acc.2 = add i32 %loop.acc.1, %array.2.i
-
- %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64
- %array.3.i = load i32, i32* %array.3.i.ptr, align 4
- %loop.acc.next = add i32 %loop.acc.2, %array.3.i
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
- %i.next = add nuw i32 %i, 1
- %continue = icmp ult i32 %i.next, %n
+ %i.next = add nsw i32 %i, 1
+ %continue = icmp ne i32 %i.next, %n
br i1 %continue, label %loop, label %exit
exit:
@@ -171,56 +145,33 @@ exit:
ret i32 %result
}
-define i32 @three_guards(i32* %array.1, i32 %length.1,
- i32* %array.2, i32 %length.2,
- i32* %array.3, i32 %length.3, i32 %n) {
-; CHECK-LABEL: @three_guards
+define i32 @signed_loop_0_to_n_unsupported_iv_step(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @signed_loop_0_to_n_unsupported_iv_step
entry:
- %tmp5 = icmp eq i32 %n, 0
+ %tmp5 = icmp sle i32 %n, 0
br i1 %tmp5, label %exit, label %loop.preheader
loop.preheader:
; CHECK: loop.preheader:
-; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
-; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = icmp ult i32 [[max_index]], %length.1
-; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = icmp ult i32 [[max_index]], %length.2
-; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = icmp ult i32 [[max_index]], %length.3
; CHECK-NEXT: br label %loop
br label %loop
loop:
; CHECK: loop:
-; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_1]], i32 9) [ "deopt"() ]
-; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_2]], i32 9) [ "deopt"() ]
-; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_3]], i32 9) [ "deopt"() ]
-
+; CHECK: %within.bounds = icmp ult i32 %i, %length
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
%i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
-
- %within.bounds.1 = icmp ult i32 %i, %length.1
- call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.1, i32 9) [ "deopt"() ]
+ %within.bounds = icmp ult i32 %i, %length
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%i.i64 = zext i32 %i to i64
- %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64
- %array.1.i = load i32, i32* %array.1.i.ptr, align 4
- %loop.acc.1 = add i32 %loop.acc, %array.1.i
-
- %within.bounds.2 = icmp ult i32 %i, %length.2
- call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.2, i32 9) [ "deopt"() ]
-
- %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64
- %array.2.i = load i32, i32* %array.2.i.ptr, align 4
- %loop.acc.2 = add i32 %loop.acc.1, %array.2.i
-
- %within.bounds.3 = icmp ult i32 %i, %length.3
- call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.3, i32 9) [ "deopt"() ]
-
- %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64
- %array.3.i = load i32, i32* %array.3.i.ptr, align 4
- %loop.acc.next = add i32 %loop.acc.2, %array.3.i
+ %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
+ %array.i = load i32, i32* %array.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc, %array.i
- %i.next = add nuw i32 %i, 1
- %continue = icmp ult i32 %i.next, %n
+ %i.next = add nsw i32 %i, 2
+ %continue = icmp slt i32 %i.next, %n
br i1 %continue, label %loop, label %exit
exit:
@@ -228,15 +179,17 @@ exit:
ret i32 %result
}
-define i32 @signed_loop_start_to_n_sge_0_check(i32* %array, i32 %length, i32 %start, i32 %n) {
-; CHECK-LABEL: @signed_loop_start_to_n_sge_0_check
+define i32 @signed_loop_0_to_n_equal_iv_range_check(i32* %array, i32 %length, i32 %n) {
+; CHECK-LABEL: @signed_loop_0_to_n_equal_iv_range_check
entry:
- %tmp5 = icmp eq i32 %n, 0
+ %tmp5 = icmp sle i32 %n, 0
br i1 %tmp5, label %exit, label %loop.preheader
loop.preheader:
; CHECK: loop.preheader:
-; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp sge i32 %start, 0
+; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
; CHECK-NEXT: br label %loop
br label %loop
@@ -244,8 +197,10 @@ loop:
; CHECK: loop:
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
%loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
- %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
- %within.bounds = icmp sge i32 %i, 0
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %j = phi i32 [ %j.next, %loop ], [ 0, %loop.preheader ]
+
+ %within.bounds = icmp ult i32 %j, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%i.i64 = zext i32 %i to i64
@@ -253,6 +208,7 @@ loop:
%array.i = load i32, i32* %array.i.ptr, align 4
%loop.acc.next = add i32 %loop.acc, %array.i
+ %j.next = add nsw i32 %j, 1
%i.next = add nsw i32 %i, 1
%continue = icmp slt i32 %i.next, %n
br i1 %continue, label %loop, label %exit
@@ -262,28 +218,26 @@ exit:
ret i32 %result
}
-define i32 @signed_loop_start_to_n_upper_slt_length_check(i32* %array, i32 %length, i32 %start, i32 %n) {
-; CHECK-LABEL: @signed_loop_start_to_n_upper_slt_length_check
+define i32 @signed_loop_0_to_n_unrelated_iv_range_check(i32* %array, i32 %start, i32 %length, i32 %n) {
+; CHECK-LABEL: @signed_loop_0_to_n_unrelated_iv_range_check
entry:
%tmp5 = icmp sle i32 %n, 0
br i1 %tmp5, label %exit, label %loop.preheader
loop.preheader:
; CHECK: loop.preheader:
-; CHECK: [[start_1:[^ ]+]] = add i32 %start, 1
-; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]]
-; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]]
-; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1
-; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_index]], %length
; CHECK-NEXT: br label %loop
br label %loop
loop:
; CHECK: loop:
-; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+; CHECK: %within.bounds = icmp ult i32 %j, %length
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
- %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
- %within.bounds = icmp slt i32 %i, %length
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %j = phi i32 [ %j.next, %loop ], [ %start, %loop.preheader ]
+
+ %within.bounds = icmp ult i32 %j, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%i.i64 = zext i32 %i to i64
@@ -291,6 +245,7 @@ loop:
%array.i = load i32, i32* %array.i.ptr, align 4
%loop.acc.next = add i32 %loop.acc, %array.i
+ %j.next = add nsw i32 %j, 1
%i.next = add nsw i32 %i, 1
%continue = icmp slt i32 %i.next, %n
br i1 %continue, label %loop, label %exit
@@ -300,41 +255,166 @@ exit:
ret i32 %result
}
-define i32 @signed_loop_start_to_n_both_checks(i32* %array, i32 %length, i32 %start, i32 %n) {
-; CHECK-LABEL: @signed_loop_start_to_n_both_checks
+define i32 @two_range_checks(i32* %array.1, i32 %length.1,
+ i32* %array.2, i32 %length.2, i32 %n) {
+; CHECK-LABEL: @two_range_checks
entry:
- %tmp5 = icmp sle i32 %n, 0
+ %tmp5 = icmp eq i32 %n, 0
br i1 %tmp5, label %exit, label %loop.preheader
loop.preheader:
; CHECK: loop.preheader:
-; CHECK: [[lower_check:[^ ]+]] = icmp sge i32 %start, 0
-; CHECK-NEXT: [[start_1:[^ ]+]] = add i32 %start, 1
-; CHECK-NEXT: [[n_sgt_start_1:[^ ]+]] = icmp sgt i32 %n, [[start_1]]
-; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[n_sgt_start_1]], i32 %n, i32 [[start_1]]
-; CHECK-NEXT: [[max_index:[^ ]+]] = add i32 [[smax]], -1
-; CHECK-NEXT: [[upper_check:[^ ]+]] = icmp slt i32 [[max_index]], %length
+; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2}}
+; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2}}
+; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]]
+; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2}}
+; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2}}
+; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]]
; CHECK-NEXT: br label %loop
br label %loop
loop:
; CHECK: loop:
-; CHECK: [[wide_cond:[^ ]+]] = and i1 [[lower_check]], [[upper_check]]
-; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+; CHECK: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
%loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
- %i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
- %within.bounds.1 = icmp slt i32 %i, %length
- %within.bounds.2 = icmp sge i32 %i, 0
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %within.bounds.1 = icmp ult i32 %i, %length.1
+ %within.bounds.2 = icmp ult i32 %i, %length.2
%within.bounds = and i1 %within.bounds.1, %within.bounds.2
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%i.i64 = zext i32 %i to i64
- %array.i.ptr = getelementptr inbounds i32, i32* %array, i64 %i.i64
- %array.i = load i32, i32* %array.i.ptr, align 4
- %loop.acc.next = add i32 %loop.acc, %array.i
+ %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64
+ %array.1.i = load i32, i32* %array.1.i.ptr, align 4
+ %loop.acc.1 = add i32 %loop.acc, %array.1.i
- %i.next = add nsw i32 %i, 1
- %continue = icmp slt i32 %i.next, %n
+ %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64
+ %array.2.i = load i32, i32* %array.2.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc.1, %array.2.i
+
+ %i.next = add nuw i32 %i, 1
+ %continue = icmp ult i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @three_range_checks(i32* %array.1, i32 %length.1,
+ i32* %array.2, i32 %length.2,
+ i32* %array.3, i32 %length.3, i32 %n) {
+; CHECK-LABEL: @three_range_checks
+entry:
+ %tmp5 = icmp eq i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}}
+; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}}
+; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]]
+; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}}
+; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}}
+; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]]
+; CHECK-NEXT: [[first_iteration_check_3:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}}
+; CHECK-NEXT: [[limit_check_3:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}}
+; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = and i1 [[first_iteration_check_3]], [[limit_check_3]]
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: [[wide_cond_and:[^ ]+]] = and i1 [[wide_cond_1]], [[wide_cond_2]]
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[wide_cond_and]], [[wide_cond_3]]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+ %within.bounds.1 = icmp ult i32 %i, %length.1
+ %within.bounds.2 = icmp ult i32 %i, %length.2
+ %within.bounds.3 = icmp ult i32 %i, %length.3
+ %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2
+ %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64
+ %array.1.i = load i32, i32* %array.1.i.ptr, align 4
+ %loop.acc.1 = add i32 %loop.acc, %array.1.i
+
+ %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64
+ %array.2.i = load i32, i32* %array.2.i.ptr, align 4
+ %loop.acc.2 = add i32 %loop.acc.1, %array.2.i
+
+ %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64
+ %array.3.i = load i32, i32* %array.3.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc.2, %array.3.i
+
+ %i.next = add nuw i32 %i, 1
+ %continue = icmp ult i32 %i.next, %n
+ br i1 %continue, label %loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ]
+ ret i32 %result
+}
+
+define i32 @three_guards(i32* %array.1, i32 %length.1,
+ i32* %array.2, i32 %length.2,
+ i32* %array.3, i32 %length.3, i32 %n) {
+; CHECK-LABEL: @three_guards
+entry:
+ %tmp5 = icmp eq i32 %n, 0
+ br i1 %tmp5, label %exit, label %loop.preheader
+
+loop.preheader:
+; CHECK: loop.preheader:
+; CHECK: [[first_iteration_check_1:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}}
+; CHECK-NEXT: [[limit_check_1:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}}
+; CHECK-NEXT: [[wide_cond_1:[^ ]+]] = and i1 [[first_iteration_check_1]], [[limit_check_1]]
+; CHECK-NEXT: [[first_iteration_check_2:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}}
+; CHECK-NEXT: [[limit_check_2:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}}
+; CHECK-NEXT: [[wide_cond_2:[^ ]+]] = and i1 [[first_iteration_check_2]], [[limit_check_2]]
+; CHECK-NEXT: [[first_iteration_check_3:[^ ]+]] = icmp ult i32 0, %length.{{1|2|3}}
+; CHECK-NEXT: [[limit_check_3:[^ ]+]] = icmp ule i32 %n, %length.{{1|2|3}}
+; CHECK-NEXT: [[wide_cond_3:[^ ]+]] = and i1 [[first_iteration_check_3]], [[limit_check_3]]
+; CHECK-NEXT: br label %loop
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_1]], i32 9) [ "deopt"() ]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_2]], i32 9) [ "deopt"() ]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond_3]], i32 9) [ "deopt"() ]
+
+ %loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
+ %i = phi i32 [ %i.next, %loop ], [ 0, %loop.preheader ]
+
+ %within.bounds.1 = icmp ult i32 %i, %length.1
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.1, i32 9) [ "deopt"() ]
+
+ %i.i64 = zext i32 %i to i64
+ %array.1.i.ptr = getelementptr inbounds i32, i32* %array.1, i64 %i.i64
+ %array.1.i = load i32, i32* %array.1.i.ptr, align 4
+ %loop.acc.1 = add i32 %loop.acc, %array.1.i
+
+ %within.bounds.2 = icmp ult i32 %i, %length.2
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.2, i32 9) [ "deopt"() ]
+
+ %array.2.i.ptr = getelementptr inbounds i32, i32* %array.2, i64 %i.i64
+ %array.2.i = load i32, i32* %array.2.i.ptr, align 4
+ %loop.acc.2 = add i32 %loop.acc.1, %array.2.i
+
+ %within.bounds.3 = icmp ult i32 %i, %length.3
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds.3, i32 9) [ "deopt"() ]
+
+ %array.3.i.ptr = getelementptr inbounds i32, i32* %array.3, i64 %i.i64
+ %array.3.i = load i32, i32* %array.3.i.ptr, align 4
+ %loop.acc.next = add i32 %loop.acc.2, %array.3.i
+
+ %i.next = add nuw i32 %i, 1
+ %continue = icmp ult i32 %i.next, %n
br i1 %continue, label %loop, label %exit
exit:
@@ -350,8 +430,9 @@ entry:
loop.preheader:
; CHECK: loop.preheader:
-; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
-; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], %length
+; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
; CHECK-NEXT: br label %loop
br label %loop
@@ -439,12 +520,12 @@ loop.preheader:
loop:
; CHECK: loop:
; CHECK: %bound = add i32 %i, %x
-; CHECK-NEXT: %within.bounds = icmp slt i32 %i, %bound
+; CHECK-NEXT: %within.bounds = icmp ult i32 %i, %bound
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%loop.acc = phi i32 [ %loop.acc.next, %loop ], [ 0, %loop.preheader ]
%i = phi i32 [ %i.next, %loop ], [ %start, %loop.preheader ]
%bound = add i32 %i, %x
- %within.bounds = icmp slt i32 %i, %bound
+ %within.bounds = icmp ult i32 %i, %bound
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%i.i64 = zext i32 %i to i64
@@ -503,9 +584,10 @@ entry:
loop.preheader:
; CHECK: loop.preheader:
-; CHECK: [[max_index:[^ ]+]] = add i32 %n, -1
-; CHECK-NEXT: [[length:[^ ]+]] = zext i16 %length.i16 to i32
-; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[max_index]], [[length]]
+; CHECK: [[length:[^ ]+]] = zext i16 %length.i16 to i32
+; CHECK-NEXT: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, [[length]]
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, [[length]]
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
; CHECK-NEXT: br label %loop
br label %loop
diff --git a/llvm/test/Transforms/LoopPredication/nested.ll b/llvm/test/Transforms/LoopPredication/nested.ll
index 6b40cde3e57..796839feec8 100644
--- a/llvm/test/Transforms/LoopPredication/nested.ll
+++ b/llvm/test/Transforms/LoopPredication/nested.ll
@@ -10,8 +10,6 @@ entry:
br i1 %tmp5, label %exit, label %outer.loop.preheader
outer.loop.preheader:
-; CHECK: outer.loop.preheader:
-; CHECK: [[iteration_count:[^ ]+]] = add i32 %l, -1
br label %outer.loop
outer.loop:
@@ -22,7 +20,10 @@ outer.loop:
inner.loop.preheader:
; CHECK: inner.loop.preheader:
-; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length
+; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %l, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
+; CHECK-NEXT: br label %inner.loop
br label %inner.loop
inner.loop:
@@ -31,7 +32,7 @@ inner.loop:
%inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
%j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ]
- %within.bounds = icmp slt i32 %j, %length
+ %within.bounds = icmp ult i32 %j, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%j.i64 = zext i32 %j to i64
@@ -62,8 +63,10 @@ entry:
outer.loop.preheader:
; CHECK: outer.loop.preheader:
-; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1
-; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[iteration_count]], %length
+; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp sle i32 %n, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
+; CHECK-NEXT: br label %outer.loop
br label %outer.loop
outer.loop:
@@ -82,7 +85,7 @@ inner.loop:
%inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
%j = phi i32 [ %j.next, %inner.loop ], [ 0, %inner.loop.preheader ]
- %within.bounds = icmp slt i32 %i, %length
+ %within.bounds = icmp ult i32 %i, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%i.i64 = zext i32 %i to i64
@@ -112,14 +115,15 @@ entry:
br i1 %tmp5, label %exit, label %outer.loop.preheader
outer.loop.preheader:
+; CHECK: outer.loop.preheader:
+; CHECK-NEXT: [[first_iteration_check_outer:[^ ]+]] = icmp ult i32 0, %length
+; CHECK-NEXT: [[limit_check_outer:[^ ]+]] = icmp sle i32 %n, %length
+; CHECK-NEXT: [[wide_cond_outer:[^ ]+]] = and i1 [[first_iteration_check_outer]], [[limit_check_outer]]
+; CHECK-NEXT: br label %outer.loop
br label %outer.loop
outer.loop:
; CHECK: outer.loop:
-; CHECK: [[i_1:[^ ]+]] = add i32 %i, 1
-; CHECK-NEXT: [[l_sgt_i_1:[^ ]+]] = icmp sgt i32 %l, [[i_1]]
-; CHECK-NEXT: [[smax:[^ ]+]] = select i1 [[l_sgt_i_1]], i32 %l, i32 [[i_1]]
-; CHECK-NEXT: [[max_j:[^ ]+]] = add i32 [[smax]], -1
%outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
%i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
%tmp6 = icmp sle i32 %l, 0
@@ -127,16 +131,69 @@ outer.loop:
inner.loop.preheader:
; CHECK: inner.loop.preheader:
-; CHECK: [[wide_cond:[^ ]+]] = icmp slt i32 [[max_j]], %length
+; CHECK: [[limit_check_inner:[^ ]+]] = icmp sle i32 %l, %length
+; CHECK: br label %inner.loop
br label %inner.loop
inner.loop:
; CHECK: inner.loop:
+; CHECK: [[wide_cond:[^ ]+]] = and i1 [[limit_check_inner]], [[wide_cond_outer]]
; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[wide_cond]], i32 9) [ "deopt"() ]
%inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
%j = phi i32 [ %j.next, %inner.loop ], [ %i, %inner.loop.preheader ]
- %within.bounds = icmp slt i32 %j, %length
+ %within.bounds = icmp ult i32 %j, %length
+ call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+
+ %j.i64 = zext i32 %j to i64
+ %array.j.ptr = getelementptr inbounds i32, i32* %array, i64 %j.i64
+ %array.j = load i32, i32* %array.j.ptr, align 4
+ %inner.loop.acc.next = add i32 %inner.loop.acc, %array.j
+
+ %j.next = add nsw i32 %j, 1
+ %inner.continue = icmp slt i32 %j.next, %l
+ br i1 %inner.continue, label %inner.loop, label %outer.loop.inc
+
+outer.loop.inc:
+ %outer.loop.acc.next = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %outer.loop ]
+ %i.next = add nsw i32 %i, 1
+ %outer.continue = icmp slt i32 %i.next, %n
+ br i1 %outer.continue, label %outer.loop, label %exit
+
+exit:
+ %result = phi i32 [ 0, %entry ], [ %outer.loop.acc.next, %outer.loop.inc ]
+ ret i32 %result
+}
+
+define i32 @cant_expand_guard_check_start(i32* %array, i32 %length, i32 %n, i32 %l, i32 %maybezero) {
+; CHECK-LABEL: @cant_expand_guard_check_start
+entry:
+ %tmp5 = icmp sle i32 %n, 0
+ br i1 %tmp5, label %exit, label %outer.loop.preheader
+
+outer.loop.preheader:
+ br label %outer.loop
+
+outer.loop:
+ %outer.loop.acc = phi i32 [ %outer.loop.acc.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+ %i = phi i32 [ %i.next, %outer.loop.inc ], [ 0, %outer.loop.preheader ]
+ %tmp6 = icmp sle i32 %l, 0
+ %div = udiv i32 %i, %maybezero
+ br i1 %tmp6, label %outer.loop.inc, label %inner.loop.preheader
+
+inner.loop.preheader:
+; CHECK: inner.loop.preheader:
+; CHECK: br label %inner.loop
+ br label %inner.loop
+
+inner.loop:
+; CHECK: inner.loop:
+; CHECK: %within.bounds = icmp ult i32 %j, %length
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
+ %inner.loop.acc = phi i32 [ %inner.loop.acc.next, %inner.loop ], [ %outer.loop.acc, %inner.loop.preheader ]
+ %j = phi i32 [ %j.next, %inner.loop ], [ %div, %inner.loop.preheader ]
+
+ %within.bounds = icmp ult i32 %j, %length
call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ]
%j.i64 = zext i32 %j to i64
diff --git a/llvm/test/Transforms/LoopPredication/visited.ll b/llvm/test/Transforms/LoopPredication/visited.ll
index e9aae77f8e6..01feaeabd16 100644
--- a/llvm/test/Transforms/LoopPredication/visited.ll
+++ b/llvm/test/Transforms/LoopPredication/visited.ll
@@ -11,8 +11,9 @@ entry:
loop.preheader:
; CHECK: loop.preheader:
-; CHECK: [[iteration_count:[^ ]+]] = add i32 %n, -1
-; CHECK-NEXT: [[wide_cond:[^ ]+]] = icmp ult i32 [[iteration_count]], %length
+; CHECK: [[first_iteration_check:[^ ]+]] = icmp ult i32 0, %length
+; CHECK-NEXT: [[limit_check:[^ ]+]] = icmp ule i32 %n, %length
+; CHECK-NEXT: [[wide_cond:[^ ]+]] = and i1 [[first_iteration_check]], [[limit_check]]
; CHECK-NEXT: br label %loop
br label %loop
OpenPOWER on IntegriCloud