summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp69
-rw-r--r--llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll108
2 files changed, 144 insertions, 33 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 2c4937b6bef..6ce2d06058c 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -717,22 +717,6 @@ bool LoopInterchangeLegality::findInductionAndReductions(
return true;
}
-static bool containsSafePHI(BasicBlock *Block, bool isOuterLoopExitBlock) {
- for (PHINode &PHI : Block->phis()) {
- // Reduction lcssa phi will have only 1 incoming block that from loop latch.
- if (PHI.getNumIncomingValues() > 1)
- return false;
- Instruction *Ins = dyn_cast<Instruction>(PHI.getIncomingValue(0));
- if (!Ins)
- return false;
- // Incoming value for lcssa phi's in outer loop exit can only be inner loop
- // exits lcssa phi else it would not be tightly nested.
- if (!isa<PHINode>(Ins) && isOuterLoopExitBlock)
- return false;
- }
- return true;
-}
-
// This function indicates the current limitations in the transform as a result
// of which we do not proceed.
bool LoopInterchangeLegality::currentLimitations() {
@@ -831,21 +815,6 @@ bool LoopInterchangeLegality::currentLimitations() {
return true;
}
- // TODO: We only handle LCSSA PHI's corresponding to reduction for now.
- BasicBlock *InnerExit = InnerLoop->getExitBlock();
- if (!containsSafePHI(InnerExit, false)) {
- LLVM_DEBUG(
- dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n");
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NoLCSSAPHIOuterInner",
- InnerLoop->getStartLoc(),
- InnerLoop->getHeader())
- << "Only inner loops with LCSSA PHIs can be interchange "
- "currently.";
- });
- return true;
- }
-
// TODO: Current limitation: Since we split the inner loop latch at the point
// were induction variable is incremented (induction.next); We cannot have
// more than 1 user of induction.next since it would result in broken code
@@ -921,6 +890,28 @@ bool LoopInterchangeLegality::currentLimitations() {
return false;
}
+// We currently only support LCSSA PHI nodes in the inner loop exit, if their
+// users are either reduction PHIs or PHIs outside the outer loop (which means
+// the we are only interested in the final value after the loop).
+static bool
+areInnerLoopExitPHIsSupported(Loop *InnerL, Loop *OuterL,
+ SmallPtrSetImpl<PHINode *> &Reductions) {
+ BasicBlock *InnerExit = OuterL->getUniqueExitBlock();
+ for (PHINode &PHI : InnerExit->phis()) {
+ // Reduction lcssa phi will have only 1 incoming block that from loop latch.
+ if (PHI.getNumIncomingValues() > 1)
+ return false;
+ if (any_of(PHI.users(), [&Reductions, OuterL](User *U) {
+ PHINode *PN = dyn_cast<PHINode>(U);
+ return !PN || (Reductions.find(PN) == Reductions.end() &&
+ OuterL->contains(PN->getParent()));
+ })) {
+ return false;
+ }
+ }
+ return true;
+}
+
// We currently support LCSSA PHI nodes in the outer loop exit, if their
// incoming values do not come from the outer loop latch or if the
// outer loop latch has a single predecessor. In that case, the value will
@@ -928,7 +919,7 @@ bool LoopInterchangeLegality::currentLimitations() {
// will still be true after interchanging. If we have multiple predecessor,
// that may not be the case, e.g. because the outer loop latch may be executed
// if the inner loop is not executed.
-static bool areLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
+static bool areOuterLoopExitPHIsSupported(Loop *OuterLoop, Loop *InnerLoop) {
BasicBlock *LoopNestExit = OuterLoop->getUniqueExitBlock();
for (PHINode &PHI : LoopNestExit->phis()) {
// FIXME: We currently are not able to detect floating point reductions
@@ -1013,7 +1004,19 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
return false;
}
- if (!areLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
+ if (!areInnerLoopExitPHIsSupported(OuterLoop, InnerLoop,
+ OuterInnerReductions)) {
+ LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop exit.\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
+ InnerLoop->getStartLoc(),
+ InnerLoop->getHeader())
+ << "Found unsupported PHI node in loop exit.";
+ });
+ return false;
+ }
+
+ if (!areOuterLoopExitPHIsSupported(OuterLoop, InnerLoop)) {
LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in outer loop exit.\n");
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedExitPHI",
diff --git a/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll b/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll
new file mode 100644
index 00000000000..9ae5f42e1ae
--- /dev/null
+++ b/llvm/test/Transforms/LoopInterchange/pr43473-invalid-lcssa-phis-in-inner-exit.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -loop-interchange -S < %s | FileCheck %s
+
+; Test cases for PR43473.
+
+; In the 2 test cases below, we have a LCSSA PHI in the inner loop exit, which
+; is used in the outer loop latch. This is not supported.
+
+define void @test1() {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
+; CHECK: outer.header:
+; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ]
+; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds double, double* undef, i64 [[OUTER_IV]]
+; CHECK-NEXT: br label [[INNER:%.*]]
+; CHECK: inner:
+; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[OUTER_HEADER]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[IDX]], align 8
+; CHECK-NEXT: store double undef, double* [[IDX]], align 8
+; CHECK-NEXT: [[INNER_IV_NEXT]] = add nuw nsw i64 [[INNER_IV]], 1
+; CHECK-NEXT: br i1 false, label [[INNER]], label [[OUTER_LATCH]]
+; CHECK: outer.latch:
+; CHECK-NEXT: [[INC43_LCSSA_WIDE_US:%.*]] = phi i64 [ [[INNER_IV_NEXT]], [[INNER]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INC43_LCSSA_WIDE_US]] to i32
+; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1
+; CHECK-NEXT: br i1 false, label [[OUTER_HEADER]], label [[OUTER_EXIT:%.*]]
+; CHECK: outer.exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %outer.header
+
+outer.header: ; preds = %for.cond26.for.end44_crit_edge.us, %entry
+ %outer.iv = phi i64 [ undef, %entry ], [ %outer.iv.next, %outer.latch ]
+ %idx = getelementptr inbounds double, double* undef, i64 %outer.iv
+ br label %inner
+
+inner: ; preds = %for.body28.us, %for.body25.us
+ %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner ]
+ %0 = load double, double* %idx, align 8
+ store double undef, double* %idx, align 8
+ %inner.iv.next = add nuw nsw i64 %inner.iv, 1
+ br i1 undef, label %inner, label %outer.latch
+
+outer.latch: ; preds = %inner
+ %inc43.lcssa.wide.us = phi i64 [ %inner.iv.next, %inner ]
+ %1 = trunc i64 %inc43.lcssa.wide.us to i32
+ %outer.iv.next = add nsw i64 %outer.iv, 1
+ br i1 undef, label %outer.header, label %outer.exit
+
+outer.exit: ; preds = %for.cond26.for.end44_crit_edge.us
+ ret void
+}
+
+; Same as @test1, but with a dedicated inner loop exit block.
+define void @test2() {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
+; CHECK: outer.header:
+; CHECK-NEXT: [[OUTER_IV:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ]
+; CHECK-NEXT: [[IDX:%.*]] = getelementptr inbounds double, double* undef, i64 [[OUTER_IV]]
+; CHECK-NEXT: br label [[INNER:%.*]]
+; CHECK: inner:
+; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[OUTER_HEADER]] ], [ [[INNER_IV_NEXT:%.*]], [[INNER]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[IDX]], align 8
+; CHECK-NEXT: store double undef, double* [[IDX]], align 8
+; CHECK-NEXT: [[INNER_IV_NEXT]] = add nuw nsw i64 [[INNER_IV]], 1
+; CHECK-NEXT: br i1 false, label [[INNER]], label [[INNER_EXIT:%.*]]
+; CHECK: inner.exit:
+; CHECK-NEXT: [[INC43_LCSSA_WIDE_US:%.*]] = phi i64 [ [[INNER_IV_NEXT]], [[INNER]] ]
+; CHECK-NEXT: br label [[OUTER_LATCH]]
+; CHECK: outer.latch:
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INC43_LCSSA_WIDE_US]] to i32
+; CHECK-NEXT: [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1
+; CHECK-NEXT: br i1 false, label [[OUTER_HEADER]], label [[OUTER_EXIT:%.*]]
+; CHECK: outer.exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %outer.header
+
+outer.header: ; preds = %for.cond26.for.end44_crit_edge.us, %entry
+ %outer.iv = phi i64 [ undef, %entry ], [ %outer.iv.next, %outer.latch ]
+ %idx = getelementptr inbounds double, double* undef, i64 %outer.iv
+ br label %inner
+
+inner: ; preds = %for.body28.us, %for.body25.us
+ %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner ]
+ %0 = load double, double* %idx, align 8
+ store double undef, double* %idx, align 8
+ %inner.iv.next = add nuw nsw i64 %inner.iv, 1
+ br i1 undef, label %inner, label %inner.exit
+
+inner.exit:
+ %inc43.lcssa.wide.us = phi i64 [ %inner.iv.next, %inner ]
+ br label %outer.latch
+
+outer.latch: ; preds = %inner
+ %1 = trunc i64 %inc43.lcssa.wide.us to i32
+ %outer.iv.next = add nsw i64 %outer.iv, 1
+ br i1 undef, label %outer.header, label %outer.exit
+
+outer.exit: ; preds = %for.cond26.for.end44_crit_edge.us
+ ret void
+}
+
OpenPOWER on IntegriCloud