summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp28
-rw-r--r--llvm/test/Transforms/IndVarSimplify/post-inc-range.ll114
2 files changed, 140 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index a1e27ac72bc..5bd1808ac7c 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -892,6 +892,10 @@ class WidenIV {
ScalarEvolution *SE;
DominatorTree *DT;
+ // Does the module have any calls to the llvm.experimental.guard intrinsic
+ // at all? If not we can avoid scanning instructions looking for guards.
+ bool HasGuards;
+
// Result
PHINode *WidePhi;
Instruction *WideInc;
@@ -938,13 +942,14 @@ class WidenIV {
public:
WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
ScalarEvolution *SEv, DominatorTree *DTree,
- SmallVectorImpl<WeakVH> &DI) :
+ SmallVectorImpl<WeakVH> &DI, bool HasGuards) :
OrigPhi(WI.NarrowIV),
WideType(WI.WidestNativeType),
LI(LInfo),
L(LI->getLoopFor(OrigPhi->getParent())),
SE(SEv),
DT(DTree),
+ HasGuards(HasGuards),
WidePhi(nullptr),
WideInc(nullptr),
WideIncExpr(nullptr),
@@ -1609,6 +1614,20 @@ void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
};
+ auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
+ if (!HasGuards)
+ return;
+
+ for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
+ Ctx->getParent()->rend())) {
+ Value *C = nullptr;
+ if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
+ UpdateRangeFromCondition(C, /*TrueDest=*/true);
+ }
+ };
+
+ UpdateRangeFromGuards(NarrowUser);
+
BasicBlock *NarrowUserBB = NarrowUser->getParent();
// If NarrowUserBB is statically unreachable asking dominator queries may
// yield suprising results. (e.g. the block may not have a dom tree node)
@@ -1620,6 +1639,7 @@ void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
DTB = DTB->getIDom()) {
auto *BB = DTB->getBlock();
auto *TI = BB->getTerminator();
+ UpdateRangeFromGuards(TI);
auto *BI = dyn_cast<BranchInst>(TI);
if (!BI || !BI->isConditional())
@@ -1711,6 +1731,10 @@ void IndVarSimplify::simplifyAndExtend(Loop *L,
LoopInfo *LI) {
SmallVector<WideIVInfo, 8> WideIVs;
+ auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_guard));
+ bool HasGuards = GuardDecl && !GuardDecl->use_empty();
+
SmallVector<PHINode*, 8> LoopPhis;
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
LoopPhis.push_back(cast<PHINode>(I));
@@ -1740,7 +1764,7 @@ void IndVarSimplify::simplifyAndExtend(Loop *L,
} while(!LoopPhis.empty());
for (; !WideIVs.empty(); WideIVs.pop_back()) {
- WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
+ WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
Changed = true;
LoopPhis.push_back(WidePhi);
diff --git a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll
index 717f0611a74..d859eb28e78 100644
--- a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll
+++ b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll
@@ -173,3 +173,117 @@ for.end:
exit:
ret void
}
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define void @test_guard_one_bb(i32* %base, i32 %limit, i32 %start) {
+; CHECK-LABEL: @test_guard_one_bb(
+; CHECK-NOT: trunc
+; CHECK-NOT: icmp slt i32
+
+for.body.lr.ph:
+ br label %for.body
+
+for.body:
+ %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.body ]
+ %within_limits = icmp ult i32 %i, 64
+ %i.i64 = zext i32 %i to i64
+ %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
+ %val = load i32, i32* %arrayidx, align 4
+ call void(i1, ...) @llvm.experimental.guard(i1 %within_limits) [ "deopt"() ]
+ %i.inc = add nsw nuw i32 %i, 1
+ %cmp = icmp slt i32 %i.inc, %limit
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ br label %exit
+
+exit:
+ ret void
+}
+
+define void @test_guard_in_the_same_bb(i32* %base, i32 %limit, i32 %start) {
+; CHECK-LABEL: @test_guard_in_the_same_bb(
+; CHECK-NOT: trunc
+; CHECK-NOT: icmp slt i32
+
+for.body.lr.ph:
+ br label %for.body
+
+for.body:
+ %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ]
+ %within_limits = icmp ult i32 %i, 64
+ %i.i64 = zext i32 %i to i64
+ %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
+ %val = load i32, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc:
+ call void(i1, ...) @llvm.experimental.guard(i1 %within_limits) [ "deopt"() ]
+ %i.inc = add nsw nuw i32 %i, 1
+ %cmp = icmp slt i32 %i.inc, %limit
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ br label %exit
+
+exit:
+ ret void
+}
+
+define void @test_guard_in_idom(i32* %base, i32 %limit, i32 %start) {
+; CHECK-LABEL: @test_guard_in_idom(
+; CHECK-NOT: trunc
+; CHECK-NOT: icmp slt i32
+
+for.body.lr.ph:
+ br label %for.body
+
+for.body:
+ %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.inc ]
+ %within_limits = icmp ult i32 %i, 64
+ call void(i1, ...) @llvm.experimental.guard(i1 %within_limits) [ "deopt"() ]
+ %i.i64 = zext i32 %i to i64
+ %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
+ %val = load i32, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc:
+ %i.inc = add nsw nuw i32 %i, 1
+ %cmp = icmp slt i32 %i.inc, %limit
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ br label %exit
+
+exit:
+ ret void
+}
+
+define void @test_guard_merge_ranges(i32* %base, i32 %limit, i32 %start) {
+; CHECK-LABEL: @test_guard_merge_ranges(
+; CHECK-NOT: trunc
+; CHECK-NOT: icmp slt i32
+
+for.body.lr.ph:
+ br label %for.body
+
+for.body:
+ %i = phi i32 [ %start, %for.body.lr.ph ], [ %i.inc, %for.body ]
+ %within_limits.1 = icmp ult i32 %i, 64
+ call void(i1, ...) @llvm.experimental.guard(i1 %within_limits.1) [ "deopt"() ]
+ %within_limits.2 = icmp ult i32 %i, 2147483647
+ call void(i1, ...) @llvm.experimental.guard(i1 %within_limits.2) [ "deopt"() ]
+ %i.i64 = zext i32 %i to i64
+ %arrayidx = getelementptr inbounds i32, i32* %base, i64 %i.i64
+ %val = load i32, i32* %arrayidx, align 4
+ %i.inc = add nsw nuw i32 %i, 1
+ %cmp = icmp slt i32 %i.inc, %limit
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ br label %exit
+
+exit:
+ ret void
+}
OpenPOWER on IntegriCloud