summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolution.h3
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp21
-rw-r--r--llvm/test/Analysis/ScalarEvolution/unknown_phis.ll54
-rw-r--r--llvm/test/Transforms/IRCE/single-access-no-preloop.ll66
-rw-r--r--llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll46
5 files changed, 187 insertions, 3 deletions
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 0e600992981..d6f6a7032d7 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1099,6 +1099,9 @@ private:
/// Mark predicate values currently being processed by isImpliedCond.
SmallPtrSet<Value *, 6> PendingLoopPredicates;
+ /// Mark SCEVUnknown Phis currently being processed by getRangeRef.
+ SmallPtrSet<const PHINode *, 6> PendingPhiRanges;
+
/// Set to true by isLoopBackedgeGuardedByCond when we're walking the set of
/// conditions dominating the backedge of a loop.
bool WalkingBEDominatingConds = false;
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 170a3078c26..5aea3a4752e 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -5582,6 +5582,25 @@ ScalarEvolution::getRangeRef(const SCEV *S,
APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1));
}
+ // A range of Phi is a subset of union of all ranges of its input.
+ if (const PHINode *Phi = dyn_cast<PHINode>(U->getValue())) {
+ // Make sure that we do not run over cycled Phis.
+ if (PendingPhiRanges.insert(Phi).second) {
+ ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false);
+ for (auto &Op : Phi->operands()) {
+ auto OpRange = getRangeRef(getSCEV(Op), SignHint);
+ RangeFromOps = RangeFromOps.unionWith(OpRange);
+ // No point to continue if we already have a full set.
+ if (RangeFromOps.isFullSet())
+ break;
+ }
+ ConservativeResult = ConservativeResult.intersectWith(RangeFromOps);
+ bool Erased = PendingPhiRanges.erase(Phi);
+ assert(Erased && "Failed to erase Phi properly?");
+ (void) Erased;
+ }
+ }
+
return setRange(U, SignHint, std::move(ConservativeResult));
}
@@ -10888,6 +10907,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)),
ValueExprMap(std::move(Arg.ValueExprMap)),
PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
+ PendingPhiRanges(std::move(Arg.PendingPhiRanges)),
MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)),
BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
PredicatedBackedgeTakenCounts(
@@ -10931,6 +10951,7 @@ ScalarEvolution::~ScalarEvolution() {
BTCI.second.clear();
assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
+ assert(PendingPhiRanges.empty() && "getRangeRef garbage");
assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!");
}
diff --git a/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll b/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll
new file mode 100644
index 00000000000..e5335b1ae91
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+define void @merge_values_with_ranges(i32 *%a_len_ptr, i32 *%b_len_ptr, i1 %unknown_cond) {
+
+; CHECK-LABEL: Classifying expressions for: @merge_values_with_ranges
+; CHECK: %len = phi i32 [ %len_a, %if.true ], [ %len_b, %if.false ]
+; CHECK-NEXT: --> %len U: [0,2147483647) S: [0,2147483647)
+
+ entry:
+ br i1 %unknown_cond, label %if.true, label %if.false
+
+if.true:
+ %len_a = load i32, i32* %a_len_ptr, !range !0
+ br label %merge
+
+if.false:
+ %len_b = load i32, i32* %b_len_ptr, !range !0
+ br label %merge
+
+merge:
+ %len = phi i32 [ %len_a, %if.true ], [ %len_b, %if.false ]
+ ret void
+}
+
+define void @merge_values_with_ranges_looped(i32 *%a_len_ptr, i32 *%b_len_ptr) {
+
+; TODO: We could be much smarter here. So far we just make sure that we do not
+; go into infinite loop analyzing these Phis.
+
+; CHECK-LABEL: Classifying expressions for: @merge_values_with_ranges_looped
+; CHECK: %p1 = phi i32 [ %len_a, %entry ], [ %p2, %loop ]
+; CHECK-NEXT: --> %p1 U: full-set S: full-set
+; CHECK: %p2 = phi i32 [ %len_b, %entry ], [ %p1, %loop ]
+; CHECK-NEXT: --> %p2 U: full-set S: full-set
+
+ entry:
+ %len_a = load i32, i32* %a_len_ptr, !range !0
+ %len_b = load i32, i32* %b_len_ptr, !range !0
+ br label %loop
+
+loop:
+ %p1 = phi i32 [ %len_a, %entry ], [ %p2, %loop ]
+ %p2 = phi i32 [ %len_b, %entry ], [ %p1, %loop ]
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add i32 %iv, 1
+ %loop.cond = icmp slt i32 %iv.next, 100
+ br i1 %loop.cond, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+
+!0 = !{i32 0, i32 2147483647}
diff --git a/llvm/test/Transforms/IRCE/single-access-no-preloop.ll b/llvm/test/Transforms/IRCE/single-access-no-preloop.ll
index c9da4e28950..81db916ba66 100644
--- a/llvm/test/Transforms/IRCE/single-access-no-preloop.ll
+++ b/llvm/test/Transforms/IRCE/single-access-no-preloop.ll
@@ -179,5 +179,71 @@ in.bounds2:
; CHECK-NOT: br i1 false
; CHECK-NOT: preloop
+define void @single_access_no_preloop_no_offset_phi_len(i32 *%arr, i32 *%a_len_ptr, i32 *%b_len_ptr, i32 %n, i1 %unknown_cond) {
+ entry:
+ br i1 %unknown_cond, label %if.true, label %if.false
+
+if.true:
+ %len_a = load i32, i32* %a_len_ptr, !range !0
+ br label %merge
+
+if.false:
+ %len_b = load i32, i32* %b_len_ptr, !range !0
+ br label %merge
+
+merge:
+ %len = phi i32 [ %len_a, %if.true ], [ %len_b, %if.false ]
+ %first.itr.check = icmp sgt i32 %n, 0
+ br i1 %first.itr.check, label %loop, label %exit
+
+ loop:
+ %idx = phi i32 [ 0, %merge ] , [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %abc = icmp slt i32 %idx, %len
+ br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+
+ in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx
+ store i32 0, i32* %addr
+ %next = icmp slt i32 %idx.next, %n
+ br i1 %next, label %loop, label %exit
+
+ out.of.bounds:
+ ret void
+
+ exit:
+ ret void
+}
+
+; CHECK-LABEL: @single_access_no_preloop_no_offset_phi_len(
+
+; CHECK: loop:
+; CHECK: br i1 true, label %in.bounds, label %out.of.bounds
+
+; CHECK: main.exit.selector:
+; CHECK-NEXT: %idx.next.lcssa = phi i32 [ %idx.next, %in.bounds ]
+; CHECK-NEXT: [[continue:%[^ ]+]] = icmp slt i32 %idx.next.lcssa, %n
+; CHECK-NEXT: br i1 [[continue]], label %main.pseudo.exit, label %exit.loopexit
+
+; CHECK: main.pseudo.exit:
+; CHECK-NEXT: %idx.copy = phi i32 [ 0, %loop.preheader ], [ %idx.next.lcssa, %main.exit.selector ]
+; CHECK-NEXT: %indvar.end = phi i32 [ 0, %loop.preheader ], [ %idx.next.lcssa, %main.exit.selector ]
+; CHECK-NEXT: br label %postloop
+
+; CHECK: postloop:
+; CHECK-NEXT: br label %loop.postloop
+
+; CHECK: loop.postloop:
+; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.next.postloop, %in.bounds.postloop ], [ %idx.copy, %postloop ]
+; CHECK-NEXT: %idx.next.postloop = add i32 %idx.postloop, 1
+; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %len
+; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds
+
+; CHECK: in.bounds.postloop:
+; CHECK-NEXT: %addr.postloop = getelementptr i32, i32* %arr, i32 %idx.postloop
+; CHECK-NEXT: store i32 0, i32* %addr.postloop
+; CHECK-NEXT: %next.postloop = icmp slt i32 %idx.next.postloop, %n
+; CHECK-NEXT: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit
+
!0 = !{i32 0, i32 2147483647}
!1 = !{!"branch_weights", i32 64, i32 4}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll
index 456271ea1aa..f944d1f97da 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/consecutive-ptr-cg-bug.ll
@@ -29,11 +29,13 @@ target triple = "x86_64-unknown-linux-gnu"
; Verify that store is vectorized as stride-1 memory access.
-; CHECK: vector.body:
-; CHECK: store <4 x i32>
+; CHECK-LABEL: @test_01(
+; CHECK-NOT: vector.body:
+; This test was originally vectorized, but now SCEV is smart enough to prove
+; that its trip count is 1, so it gets ignored by vectorizer.
; Function Attrs: uwtable
-define void @test() {
+define void @test_01() {
br label %.outer
; <label>:1: ; preds = %2
@@ -66,3 +68,41 @@ define void @test() {
br i1 undef, label %2, label %._crit_edge.loopexit
}
+; After trip count is increased, the test gets vectorized.
+; CHECK-LABEL: @test_02(
+; CHECK: vector.body:
+; CHECK: store <4 x i32>
+
+; Function Attrs: uwtable
+define void @test_02() {
+ br label %.outer
+
+; <label>:1: ; preds = %2
+ ret void
+
+; <label>:2: ; preds = %._crit_edge.loopexit
+ %3 = add nsw i32 %.ph, -2
+ br i1 undef, label %1, label %.outer
+
+.outer: ; preds = %2, %0
+ %.ph = phi i32 [ %3, %2 ], [ 336, %0 ]
+ %.ph2 = phi i32 [ 62, %2 ], [ 110, %0 ]
+ %4 = and i32 %.ph, 30
+ %5 = add i32 %.ph2, 1
+ br label %6
+
+; <label>:6: ; preds = %6, %.outer
+ %7 = phi i32 [ %5, %.outer ], [ %13, %6 ]
+ %8 = phi i32 [ %.ph2, %.outer ], [ %7, %6 ]
+ %9 = add i32 %8, 2
+ %10 = zext i32 %9 to i64
+ %11 = getelementptr inbounds i32, i32 addrspace(1)* undef, i64 %10
+ %12 = ashr i32 undef, %4
+ store i32 %12, i32 addrspace(1)* %11, align 4
+ %13 = add i32 %7, 1
+ %14 = icmp sgt i32 %13, 610
+ br i1 %14, label %._crit_edge.loopexit, label %6
+
+._crit_edge.loopexit: ; preds = %._crit_edge.loopexit, %6
+ br i1 undef, label %2, label %._crit_edge.loopexit
+}
OpenPOWER on IntegriCloud