summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp107
-rw-r--r--llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll484
2 files changed, 499 insertions, 92 deletions
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 9c9ed2a02e3..1f3ac8af86c 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -949,19 +949,6 @@ static BasicBlock *buildClonedLoopBlocks(
AC.registerAssumption(II);
}
- // Remove the cloned parent as a predecessor of the cloned continue successor
- // if we did in fact clone it.
- auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
- if (auto *ClonedContinueSuccBB =
- cast_or_null<BasicBlock>(VMap.lookup(ContinueSuccBB)))
- ClonedContinueSuccBB->removePredecessor(ClonedParentBB,
- /*DontDeleteUselessPHIs*/ true);
- // Replace the cloned branch with an unconditional branch to the cloned
- // unswitched successor.
- auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
- ClonedParentBB->getTerminator()->eraseFromParent();
- BranchInst::Create(ClonedSuccBB, ClonedParentBB);
-
// Update any PHI nodes in the cloned successors of the skipped blocks to not
// have spurious incoming values.
for (auto *LoopBB : L.blocks())
@@ -971,6 +958,45 @@ static BasicBlock *buildClonedLoopBlocks(
for (PHINode &PN : ClonedSuccBB->phis())
PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
+ // Remove the cloned parent as a predecessor of any successor we ended up
+ // cloning other than the unswitched one.
+ auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
+ for (auto *SuccBB : successors(ParentBB)) {
+ if (SuccBB == UnswitchedSuccBB)
+ continue;
+
+ auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
+ if (!ClonedSuccBB)
+ continue;
+
+ ClonedSuccBB->removePredecessor(ClonedParentBB,
+ /*DontDeleteUselessPHIs*/ true);
+ }
+
+ // Replace the cloned branch with an unconditional branch to the cloned
+ // unswitched successor.
+ auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
+ ClonedParentBB->getTerminator()->eraseFromParent();
+ BranchInst::Create(ClonedSuccBB, ClonedParentBB);
+
+ // If there are duplicate entries in the PHI nodes because of multiple edges
+ // to the unswitched successor, we need to nuke all but one as we replaced it
+ // with a direct branch.
+ for (PHINode &PN : ClonedSuccBB->phis()) {
+ bool Found = false;
+ // Loop over the incoming operands backwards so we can easily delete as we
+ // go without invalidating the index.
+ for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
+ if (PN.getIncomingBlock(i) != ClonedParentBB)
+ continue;
+ if (!Found) {
+ Found = true;
+ continue;
+ }
+ PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
+ }
+ }
+
// Record the domtree updates for the new blocks.
SmallPtrSet<BasicBlock *, 4> SuccSet;
for (auto *ClonedBB : NewBlocks) {
@@ -1779,7 +1805,8 @@ static bool unswitchNontrivialInvariants(
UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
else
for (auto Case : SI->cases())
- UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
+ if (Case.getCaseSuccessor() != RetainedSuccBB)
+ UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
"Should not unswitch the same successor we are retaining!");
@@ -1873,19 +1900,44 @@ static bool unswitchNontrivialInvariants(
// nuke the initial terminator placed in the split block.
SplitBB->getTerminator()->eraseFromParent();
if (FullUnswitch) {
- for (BasicBlock *SuccBB : UnswitchedSuccBBs) {
+ // First we need to unhook the successor relationship as we'll be replacing
+ // the terminator with a direct branch. This is much simpler for branches
+ // than switches so we handle those first.
+ if (BI) {
// Remove the parent as a predecessor of the unswitched successor.
- SuccBB->removePredecessor(ParentBB,
- /*DontDeleteUselessPHIs*/ true);
- DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
+ assert(UnswitchedSuccBBs.size() == 1 &&
+ "Only one possible unswitched block for a branch!");
+ BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
+ UnswitchedSuccBB->removePredecessor(ParentBB,
+ /*DontDeleteUselessPHIs*/ true);
+ DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
+ } else {
+ // Note that we actually want to remove the parent block as a predecessor
+ // of *every* case successor. The case successor is either unswitched,
+ // completely eliminating an edge from the parent to that successor, or it
+ // is a duplicate edge to the retained successor as the retained successor
+ // is always the default successor and as we'll replace this with a direct
+ // branch we no longer need the duplicate entries in the PHI nodes.
+ assert(SI->getDefaultDest() == RetainedSuccBB &&
+ "Not retaining default successor!");
+ for (auto &Case : SI->cases())
+ Case.getCaseSuccessor()->removePredecessor(
+ ParentBB,
+ /*DontDeleteUselessPHIs*/ true);
+
+ // We need to use the set to populate domtree updates as even when there
+ // are multiple cases pointing at the same successor we only want to
+ // remove and insert one edge in the domtree.
+ for (BasicBlock *SuccBB : UnswitchedSuccBBs)
+ DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
}
- // Now splice the terminator from the original loop and rewrite its
- // successors.
+ // Now that we've unhooked the successor relationship, splice the terminator
+ // from the original loop to the split.
SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
+
+ // Now wire up the terminator to the preheaders.
if (BI) {
- assert(UnswitchedSuccBBs.size() == 1 &&
- "Only one possible unswitched block for a branch!");
BasicBlock *ClonedPH = ClonedPHs.begin()->second;
BI->setSuccessor(ClonedSucc, ClonedPH);
BI->setSuccessor(1 - ClonedSucc, LoopPH);
@@ -1894,16 +1946,19 @@ static bool unswitchNontrivialInvariants(
assert(SI && "Must either be a branch or switch!");
// Walk the cases and directly update their successors.
+ SI->setDefaultDest(LoopPH);
for (auto &Case : SI->cases())
- Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
+ if (Case.getCaseSuccessor() == RetainedSuccBB)
+ Case.setSuccessor(LoopPH);
+ else
+ Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
+
// We need to use the set to populate domtree updates as even when there
// are multiple cases pointing at the same successor we only want to
- // insert one edge in the domtree.
+ // remove and insert one edge in the domtree.
for (BasicBlock *SuccBB : UnswitchedSuccBBs)
DTUpdates.push_back(
{DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
-
- SI->setDefaultDest(LoopPH);
}
// Create a new unconditional branch to the continuing block (as opposed to
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll
index bcf9fa81678..d9c143edc30 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll
@@ -1,10 +1,10 @@
; RUN: opt -passes='loop(unswitch),verify<loops>' -enable-nontrivial-unswitch -S < %s | FileCheck %s
; RUN: opt -simple-loop-unswitch -enable-nontrivial-unswitch -S < %s | FileCheck %s
-declare void @a()
-declare void @b()
-declare void @c()
-declare void @d()
+declare i32 @a()
+declare i32 @b()
+declare i32 @c()
+declare i32 @d()
declare void @sink1(i32)
declare void @sink2(i32)
@@ -29,11 +29,11 @@ loop_begin:
; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b
loop_a:
- call void @a() convergent
+ call i32 @a() convergent
br label %loop_latch
loop_b:
- call void @b()
+ call i32 @b()
br label %loop_latch
loop_latch:
@@ -61,11 +61,11 @@ loop_begin:
; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b
loop_a:
- call void @a() noduplicate
+ call i32 @a() noduplicate
br label %loop_latch
loop_b:
- call void @b()
+ call i32 @b()
br label %loop_latch
loop_latch:
@@ -96,15 +96,15 @@ loop_begin:
; CHECK-NEXT: br i1 %cond, label %loop_a, label %loop_b
loop_a:
- call void @a()
+ call i32 @a()
br label %loop_cont
loop_b:
- call void @b()
+ call i32 @b()
br label %loop_cont
loop_cont:
- invoke void @a()
+ invoke i32 @a()
to label %loop_latch unwind label %loop_catch
loop_latch:
@@ -139,7 +139,7 @@ loop_begin:
br i1 %cond1, label %loop_a, label %loop_b
loop_a:
- call void @a()
+ call i32 @a()
br label %latch
; The 'loop_a' unswitched loop.
;
@@ -150,7 +150,7 @@ loop_a:
; CHECK-NEXT: br label %loop_a.us
;
; CHECK: loop_a.us:
-; CHECK-NEXT: call void @a()
+; CHECK-NEXT: call i32 @a()
; CHECK-NEXT: br label %latch.us
;
; CHECK: latch.us:
@@ -168,7 +168,7 @@ loop_b:
; CHECK-NEXT: br i1 %cond2, label %entry.split.split.us, label %entry.split.split
loop_b_a:
- call void @b()
+ call i32 @b()
br label %latch
; The 'loop_b_a' unswitched loop.
;
@@ -182,7 +182,7 @@ loop_b_a:
; CHECK-NEXT: br label %loop_b_a.us
;
; CHECK: loop_b_a.us:
-; CHECK-NEXT: call void @b()
+; CHECK-NEXT: call i32 @b()
; CHECK-NEXT: br label %latch.us2
;
; CHECK: latch.us2:
@@ -193,7 +193,7 @@ loop_b_a:
; CHECK-NEXT: br label %loop_exit.split
loop_b_b:
- call void @c()
+ call i32 @c()
br label %latch
; The 'loop_b_b' unswitched loop.
;
@@ -207,7 +207,7 @@ loop_b_b:
; CHECK-NEXT: br label %loop_b_b
;
; CHECK: loop_b_b:
-; CHECK-NEXT: call void @c()
+; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %latch
;
; CHECK: latch:
@@ -2307,7 +2307,7 @@ loop_begin:
]
loop_a:
- call void @a()
+ call i32 @a()
br label %loop_latch
; Unswitched 'a' loop.
;
@@ -2319,14 +2319,14 @@ loop_a:
; CHECK-NEXT: br label %[[LOOP_A:.*]]
;
; CHECK: [[LOOP_A]]:
-; CHECK-NEXT: call void @a()
+; CHECK-NEXT: call i32 @a()
; CHECK-NEXT: br label %[[LOOP_LATCH_A:.*]]
;
; CHECK: [[LOOP_LATCH_A]]:
; CHECK: br label %[[LOOP_BEGIN_A]]
loop_b:
- call void @b()
+ call i32 @b()
br label %loop_latch
; Unswitched 'b' loop.
;
@@ -2338,14 +2338,14 @@ loop_b:
; CHECK-NEXT: br label %[[LOOP_B:.*]]
;
; CHECK: [[LOOP_B]]:
-; CHECK-NEXT: call void @b()
+; CHECK-NEXT: call i32 @b()
; CHECK-NEXT: br label %[[LOOP_LATCH_B:.*]]
;
; CHECK: [[LOOP_LATCH_B]]:
; CHECK: br label %[[LOOP_BEGIN_B]]
loop_c:
- call void @c() noreturn nounwind
+ call i32 @c() noreturn nounwind
br label %loop_latch
; Unswitched 'c' loop.
;
@@ -2357,7 +2357,7 @@ loop_c:
; CHECK-NEXT: br label %[[LOOP_C:.*]]
;
; CHECK: [[LOOP_C]]:
-; CHECK-NEXT: call void @c()
+; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %[[LOOP_LATCH_C:.*]]
;
; CHECK: [[LOOP_LATCH_C]]:
@@ -2615,7 +2615,7 @@ loop_a:
; CHECK-NEXT: br i1 %cond2, label %entry.split.us.split.us, label %entry.split.us.split
loop_a_a:
- call void @a()
+ call i32 @a()
br label %latch
; The 'loop_a_a' unswitched loop.
;
@@ -2629,7 +2629,7 @@ loop_a_a:
; CHECK-NEXT: br label %loop_a_a.us.us
;
; CHECK: loop_a_a.us.us:
-; CHECK-NEXT: call void @a()
+; CHECK-NEXT: call i32 @a()
; CHECK-NEXT: br label %latch.us.us
;
; CHECK: latch.us.us:
@@ -2640,7 +2640,7 @@ loop_a_a:
; CHECK-NEXT: br label %loop_exit.split
loop_a_c:
- call void @c()
+ call i32 @c()
br label %latch
; The 'loop_a_c' unswitched loop.
;
@@ -2654,7 +2654,7 @@ loop_a_c:
; CHECK-NEXT: br label %loop_a_c.us
;
; CHECK: loop_a_c.us:
-; CHECK-NEXT: call void @c()
+; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %latch
;
; CHECK: latch.us:
@@ -2665,7 +2665,7 @@ loop_a_c:
; CHECK-NEXT: br label %loop_exit.split
loop_b:
- call void @b()
+ call i32 @b()
br label %latch
; The 'loop_b' unswitched loop.
;
@@ -2676,7 +2676,7 @@ loop_b:
; CHECK-NEXT: br label %loop_b
;
; CHECK: loop_b:
-; CHECK-NEXT: call void @b()
+; CHECK-NEXT: call i32 @b()
; CHECK-NEXT: br label %latch
;
; CHECK: latch:
@@ -2710,7 +2710,7 @@ loop_begin:
br i1 %cond_or, label %loop_a, label %loop_b
loop_a:
- call void @a()
+ call i32 @a()
br label %latch
; The 'loop_a' unswitched loop.
;
@@ -2723,7 +2723,7 @@ loop_a:
; CHECK-NEXT: br label %loop_a.us
;
; CHECK: loop_a.us:
-; CHECK-NEXT: call void @a()
+; CHECK-NEXT: call i32 @a()
; CHECK-NEXT: br label %latch.us
;
; CHECK: latch.us:
@@ -2734,7 +2734,7 @@ loop_a:
; CHECK-NEXT: br label %loop_exit
loop_b:
- call void @b()
+ call i32 @b()
br label %latch
; The original loop.
;
@@ -2747,11 +2747,11 @@ loop_b:
; CHECK-NEXT: br i1 %[[OR]], label %loop_a, label %loop_b
;
; CHECK: loop_a:
-; CHECK-NEXT: call void @a()
+; CHECK-NEXT: call i32 @a()
; CHECK-NEXT: br label %latch
;
; CHECK: loop_b:
-; CHECK-NEXT: call void @b()
+; CHECK-NEXT: call i32 @b()
; CHECK-NEXT: br label %latch
latch:
@@ -2803,7 +2803,7 @@ loop_begin:
; CHECK-NEXT: br label %loop_b.us
;
; CHECK: loop_b.us:
-; CHECK-NEXT: call void @b()
+; CHECK-NEXT: call i32 @b()
; CHECK-NEXT: br label %latch.us
;
; CHECK: latch.us:
@@ -2828,17 +2828,17 @@ loop_begin:
; CHECK-NEXT: br i1 %[[AND3]], label %loop_a, label %loop_b
loop_a:
- call void @a()
+ call i32 @a()
br label %latch
; CHECK: loop_a:
-; CHECK-NEXT: call void @a()
+; CHECK-NEXT: call i32 @a()
; CHECK-NEXT: br label %latch
loop_b:
- call void @b()
+ call i32 @b()
br label %latch
; CHECK: loop_b:
-; CHECK-NEXT: call void @b()
+; CHECK-NEXT: call i32 @b()
; CHECK-NEXT: br label %latch
latch:
@@ -2877,7 +2877,7 @@ loop_begin:
]
loop_a:
- call void @a()
+ call i32 @a()
br label %latch
; Unswitched 'a' loop.
;
@@ -2888,7 +2888,7 @@ loop_a:
; CHECK-NEXT: br label %[[LOOP_A:.*]]
;
; CHECK: [[LOOP_A]]:
-; CHECK-NEXT: call void @a()
+; CHECK-NEXT: call i32 @a()
; CHECK-NEXT: br label %[[LOOP_LATCH_A:.*]]
;
; CHECK: [[LOOP_LATCH_A]]:
@@ -2899,7 +2899,7 @@ loop_a:
; CHECK-NEXT: br label %loop_exit
loop_b:
- call void @b()
+ call i32 @b()
br label %latch
; Unswitched 'b' loop.
;
@@ -2910,7 +2910,7 @@ loop_b:
; CHECK-NEXT: br label %[[LOOP_B:.*]]
;
; CHECK: [[LOOP_B]]:
-; CHECK-NEXT: call void @b()
+; CHECK-NEXT: call i32 @b()
; CHECK-NEXT: br label %[[LOOP_LATCH_B:.*]]
;
; CHECK: [[LOOP_LATCH_B]]:
@@ -2921,7 +2921,7 @@ loop_b:
; CHECK-NEXT: br label %loop_exit
loop_c:
- call void @c()
+ call i32 @c()
br label %latch
; Unswitched 'c' loop.
;
@@ -2932,7 +2932,7 @@ loop_c:
; CHECK-NEXT: br label %[[LOOP_C:.*]]
;
; CHECK: [[LOOP_C]]:
-; CHECK-NEXT: call void @c()
+; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %[[LOOP_LATCH_C:.*]]
;
; CHECK: [[LOOP_LATCH_C]]:
@@ -2966,6 +2966,358 @@ loop_exit:
; CHECK-NEXT: ret i32 0
}
+; A test case designed to exercise unusual properties of switches: they
+; can introduce multiple edges to successors. These need lots of special case
+; handling as they get collapsed in many cases (domtree, the unswitch itself)
+; but not in all cases (the PHI node operands).
+define i32 @test28(i32 %arg) {
+; CHECK-LABEL: @test28(
+entry:
+ br label %header
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 %arg, label %[[ENTRY_SPLIT_C:.*]] [
+; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]]
+; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_A]]
+; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_B:.*]]
+; CHECK-NEXT: i32 3, label %[[ENTRY_SPLIT_C]]
+; CHECK-NEXT: ]
+
+header:
+ %tmp = call i32 @d()
+ %cmp1 = icmp eq i32 %tmp, 0
+ ; We set up a chain through all the successors of the switch that doesn't
+ ; involve the switch so that we can have interesting PHI nodes in them.
+ br i1 %cmp1, label %body.a, label %dispatch
+
+dispatch:
+ ; Switch with multiple successors. We arrange the last successor to be the
+ ; default to make the test case easier to read. This has a duplicate edge
+ ; both to the default destination (which is completely superfluous but
+ ; technically valid IR) and to a regular successor.
+ switch i32 %arg, label %body.c [
+ i32 0, label %body.a
+ i32 1, label %body.a
+ i32 2, label %body.b
+ i32 3, label %body.c
+ ]
+
+body.a:
+ %tmp.a.phi = phi i32 [ 0, %header ], [ %tmp, %dispatch ], [ %tmp, %dispatch ]
+ %tmp.a = call i32 @a()
+ %tmp.a.sum = add i32 %tmp.a.phi, %tmp.a
+ br label %body.b
+; Unswitched 'a' loop.
+;
+; CHECK: [[ENTRY_SPLIT_A]]:
+; CHECK-NEXT: br label %[[HEADER_A:.*]]
+;
+; CHECK: [[HEADER_A]]:
+; CHECK-NEXT: %[[TMP_A:.*]] = call i32 @d()
+; CHECK-NEXT: %[[CMP1_A:.*]] = icmp eq i32 %[[TMP_A]], 0
+; CHECK-NEXT: br i1 %[[CMP1_A]], label %[[BODY_A_A:.*]], label %[[DISPATCH_A:.*]]
+;
+; CHECK: [[DISPATCH_A]]:
+; CHECK-NEXT: br label %[[BODY_A_A]]
+;
+; CHECK: [[BODY_A_A]]:
+; CHECK-NEXT: %[[TMP_A_PHI_A:.*]] = phi i32 [ 0, %[[HEADER_A]] ], [ %[[TMP_A]], %[[DISPATCH_A]] ]
+; CHECK-NEXT: %[[TMP_A_A:.*]] = call i32 @a()
+; CHECK-NEXT: %[[TMP_A_SUM_A:.*]] = add i32 %[[TMP_A_PHI_A]], %[[TMP_A_A]]
+; CHECK-NEXT: br label %[[BODY_B_A:.*]]
+;
+; CHECK: [[BODY_B_A]]:
+; CHECK-NEXT: %[[TMP_B_PHI_A:.*]] = phi i32 [ %[[TMP_A_SUM_A]], %[[BODY_A_A]] ]
+; CHECK-NEXT: %[[TMP_B_A:.*]] = call i32 @b()
+; CHECK-NEXT: %[[TMP_B_SUM_A:.*]] = add i32 %[[TMP_B_PHI_A]], %[[TMP_B_A]]
+; CHECK-NEXT: br label %[[BODY_C_A:.*]]
+;
+; CHECK: [[BODY_C_A]]:
+; CHECK-NEXT: %[[TMP_C_PHI_A:.*]] = phi i32 [ %[[TMP_B_SUM_A]], %[[BODY_B_A]] ]
+; CHECK-NEXT: %[[TMP_C_A:.*]] = call i32 @c()
+; CHECK-NEXT: %[[TMP_C_SUM_A:.*]] = add i32 %[[TMP_C_PHI_A]], %[[TMP_C_A]]
+; CHECK-NEXT: br label %[[LATCH_A:.*]]
+;
+; CHECK: [[LATCH_A]]:
+; CHECK-NEXT: %[[CMP2_A:.*]] = icmp slt i32 %[[TMP_C_SUM_A]], 42
+; CHECK: br i1 %[[CMP2_A]], label %[[HEADER_A]], label %[[LOOP_EXIT_A:.*]]
+;
+; CHECK: [[LOOP_EXIT_A]]:
+; CHECK-NEXT: %[[LCSSA_A:.*]] = phi i32 [ %[[TMP_C_SUM_A]], %[[LATCH_A]] ]
+; CHECK-NEXT: br label %exit
+
+body.b:
+ %tmp.b.phi = phi i32 [ %tmp, %dispatch ], [ %tmp.a.sum, %body.a ]
+ %tmp.b = call i32 @b()
+ %tmp.b.sum = add i32 %tmp.b.phi, %tmp.b
+ br label %body.c
+; Unswitched 'b' loop.
+;
+; CHECK: [[ENTRY_SPLIT_B]]:
+; CHECK-NEXT: br label %[[HEADER_B:.*]]
+;
+; CHECK: [[HEADER_B]]:
+; CHECK-NEXT: %[[TMP_B:.*]] = call i32 @d()
+; CHECK-NEXT: %[[CMP1_B:.*]] = icmp eq i32 %[[TMP_B]], 0
+; CHECK-NEXT: br i1 %[[CMP1_B]], label %[[BODY_A_B:.*]], label %[[DISPATCH_B:.*]]
+;
+; CHECK: [[DISPATCH_B]]:
+; CHECK-NEXT: br label %[[BODY_B_B:.*]]
+;
+; CHECK: [[BODY_A_B]]:
+; CHECK-NEXT: %[[TMP_A_PHI_B:.*]] = phi i32 [ 0, %[[HEADER_B]] ]
+; CHECK-NEXT: %[[TMP_A_B:.*]] = call i32 @a()
+; CHECK-NEXT: %[[TMP_A_SUM_B:.*]] = add i32 %[[TMP_A_PHI_B]], %[[TMP_A_B]]
+; CHECK-NEXT: br label %[[BODY_B_B:.*]]
+;
+; CHECK: [[BODY_B_B]]:
+; CHECK-NEXT: %[[TMP_B_PHI_B:.*]] = phi i32 [ %[[TMP_B]], %[[DISPATCH_B]] ], [ %[[TMP_A_SUM_B]], %[[BODY_A_B]] ]
+; CHECK-NEXT: %[[TMP_B_B:.*]] = call i32 @b()
+; CHECK-NEXT: %[[TMP_B_SUM_B:.*]] = add i32 %[[TMP_B_PHI_B]], %[[TMP_B_B]]
+; CHECK-NEXT: br label %[[BODY_C_B:.*]]
+;
+; CHECK: [[BODY_C_B]]:
+; CHECK-NEXT: %[[TMP_C_PHI_B:.*]] = phi i32 [ %[[TMP_B_SUM_B]], %[[BODY_B_B]] ]
+; CHECK-NEXT: %[[TMP_C_B:.*]] = call i32 @c()
+; CHECK-NEXT: %[[TMP_C_SUM_B:.*]] = add i32 %[[TMP_C_PHI_B]], %[[TMP_C_B]]
+; CHECK-NEXT: br label %[[LATCH_B:.*]]
+;
+; CHECK: [[LATCH_B]]:
+; CHECK-NEXT: %[[CMP2_B:.*]] = icmp slt i32 %[[TMP_C_SUM_B]], 42
+; CHECK: br i1 %[[CMP2_B]], label %[[HEADER_B]], label %[[LOOP_EXIT_B:.*]]
+;
+; CHECK: [[LOOP_EXIT_B]]:
+; CHECK-NEXT: %[[LCSSA_B:.*]] = phi i32 [ %[[TMP_C_SUM_B]], %[[LATCH_B]] ]
+; CHECK-NEXT: br label %[[EXIT_SPLIT:.*]]
+
+body.c:
+ %tmp.c.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ], [ %tmp.b.sum, %body.b ]
+ %tmp.c = call i32 @c()
+ %tmp.c.sum = add i32 %tmp.c.phi, %tmp.c
+ br label %latch
+; Unswitched 'c' loop.
+;
+; CHECK: [[ENTRY_SPLIT_C]]:
+; CHECK-NEXT: br label %[[HEADER_C:.*]]
+;
+; CHECK: [[HEADER_C]]:
+; CHECK-NEXT: %[[TMP_C:.*]] = call i32 @d()
+; CHECK-NEXT: %[[CMP1_C:.*]] = icmp eq i32 %[[TMP_C]], 0
+; CHECK-NEXT: br i1 %[[CMP1_C]], label %[[BODY_A_C:.*]], label %[[DISPATCH_C:.*]]
+;
+; CHECK: [[DISPATCH_C]]:
+; CHECK-NEXT: br label %[[BODY_C_C:.*]]
+;
+; CHECK: [[BODY_A_C]]:
+; CHECK-NEXT: %[[TMP_A_PHI_C:.*]] = phi i32 [ 0, %[[HEADER_C]] ]
+; CHECK-NEXT: %[[TMP_A_C:.*]] = call i32 @a()
+; CHECK-NEXT: %[[TMP_A_SUM_C:.*]] = add i32 %[[TMP_A_PHI_C]], %[[TMP_A_C]]
+; CHECK-NEXT: br label %[[BODY_B_C:.*]]
+;
+; CHECK: [[BODY_B_C]]:
+; CHECK-NEXT: %[[TMP_B_PHI_C:.*]] = phi i32 [ %[[TMP_A_SUM_C]], %[[BODY_A_C]] ]
+; CHECK-NEXT: %[[TMP_B_C:.*]] = call i32 @b()
+; CHECK-NEXT: %[[TMP_B_SUM_C:.*]] = add i32 %[[TMP_B_PHI_C]], %[[TMP_B_C]]
+; CHECK-NEXT: br label %[[BODY_C_C:.*]]
+;
+; CHECK: [[BODY_C_C]]:
+; CHECK-NEXT: %[[TMP_C_PHI_C:.*]] = phi i32 [ %[[TMP_C]], %[[DISPATCH_C]] ], [ %[[TMP_B_SUM_C]], %[[BODY_B_C]] ]
+; CHECK-NEXT: %[[TMP_C_C:.*]] = call i32 @c()
+; CHECK-NEXT: %[[TMP_C_SUM_C:.*]] = add i32 %[[TMP_C_PHI_C]], %[[TMP_C_C]]
+; CHECK-NEXT: br label %[[LATCH_C:.*]]
+;
+; CHECK: [[LATCH_C]]:
+; CHECK-NEXT: %[[CMP2_C:.*]] = icmp slt i32 %[[TMP_C_SUM_C]], 42
+; CHECK: br i1 %[[CMP2_C]], label %[[HEADER_C]], label %[[LOOP_EXIT_C:.*]]
+;
+; CHECK: [[LOOP_EXIT_C]]:
+; CHECK-NEXT: %[[LCSSA_C:.*]] = phi i32 [ %[[TMP_C_SUM_C]], %[[LATCH_C]] ]
+; CHECK-NEXT: br label %[[EXIT_SPLIT]]
+
+latch:
+ %cmp2 = icmp slt i32 %tmp.c.sum, 42
+ br i1 %cmp2, label %header, label %exit
+
+exit:
+ %lcssa.phi = phi i32 [ %tmp.c.sum, %latch ]
+ ret i32 %lcssa.phi
+; CHECK: [[EXIT_SPLIT]]:
+; CHECK-NEXT: %[[EXIT_PHI1:.*]] = phi i32 [ %[[LCSSA_C]], %[[LOOP_EXIT_C]] ], [ %[[LCSSA_B]], %[[LOOP_EXIT_B]] ]
+; CHECK-NEXT: br label %exit
+
+; CHECK: exit:
+; CHECK-NEXT: %[[EXIT_PHI2:.*]] = phi i32 [ %[[EXIT_PHI1]], %[[EXIT_SPLIT]] ], [ %[[LCSSA_A]], %[[LOOP_EXIT_A]] ]
+; CHECK-NEXT: ret i32 %[[EXIT_PHI2]]
+}
+
+; Similar to @test28 but designed to have one of the duplicate edges be
+; a loop exit edge as those can in some cases be special. Among other things,
+; this includes an LCSSA phi with multiple entries despite being a dedicated
+; exit block.
+define i32 @test29(i32 %arg) {
+; CHECK-LABEL: define i32 @test29(
+entry:
+ br label %header
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 %arg, label %[[ENTRY_SPLIT_EXIT:.*]] [
+; CHECK-NEXT: i32 -1, label %[[ENTRY_SPLIT_EXIT]]
+; CHECK-NEXT: i32 0, label %[[ENTRY_SPLIT_A:.*]]
+; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_B:.*]]
+; CHECK-NEXT: i32 2, label %[[ENTRY_SPLIT_B]]
+; CHECK-NEXT: ]
+
+header:
+ %tmp = call i32 @d()
+ %cmp1 = icmp eq i32 %tmp, 0
+ br i1 %cmp1, label %body.a, label %dispatch
+
+dispatch:
+ switch i32 %arg, label %loop.exit1 [
+ i32 -1, label %loop.exit1
+ i32 0, label %body.a
+ i32 1, label %body.b
+ i32 2, label %body.b
+ ]
+
+body.a:
+ %tmp.a.phi = phi i32 [ 0, %header ], [ %tmp, %dispatch ]
+ %tmp.a = call i32 @a()
+ %tmp.a.sum = add i32 %tmp.a.phi, %tmp.a
+ br label %body.b
+; Unswitched 'a' loop.
+;
+; CHECK: [[ENTRY_SPLIT_A]]:
+; CHECK-NEXT: br label %[[HEADER_A:.*]]
+;
+; CHECK: [[HEADER_A]]:
+; CHECK-NEXT: %[[TMP_A:.*]] = call i32 @d()
+; CHECK-NEXT: %[[CMP1_A:.*]] = icmp eq i32 %[[TMP_A]], 0
+; CHECK-NEXT: br i1 %[[CMP1_A]], label %[[BODY_A_A:.*]], label %[[DISPATCH_A:.*]]
+;
+; CHECK: [[DISPATCH_A]]:
+; CHECK-NEXT: br label %[[BODY_A_A]]
+;
+; CHECK: [[BODY_A_A]]:
+; CHECK-NEXT: %[[TMP_A_PHI_A:.*]] = phi i32 [ 0, %[[HEADER_A]] ], [ %[[TMP_A]], %[[DISPATCH_A]] ]
+; CHECK-NEXT: %[[TMP_A_A:.*]] = call i32 @a()
+; CHECK-NEXT: %[[TMP_A_SUM_A:.*]] = add i32 %[[TMP_A_PHI_A]], %[[TMP_A_A]]
+; CHECK-NEXT: br label %[[BODY_B_A:.*]]
+;
+; CHECK: [[BODY_B_A]]:
+; CHECK-NEXT: %[[TMP_B_PHI_A:.*]] = phi i32 [ %[[TMP_A_SUM_A]], %[[BODY_A_A]] ]
+; CHECK-NEXT: %[[TMP_B_A:.*]] = call i32 @b()
+; CHECK-NEXT: %[[TMP_B_SUM_A:.*]] = add i32 %[[TMP_B_PHI_A]], %[[TMP_B_A]]
+; CHECK-NEXT: br label %[[LATCH_A:.*]]
+;
+; CHECK: [[LATCH_A]]:
+; CHECK-NEXT: %[[CMP2_A:.*]] = icmp slt i32 %[[TMP_B_SUM_A]], 42
+; CHECK: br i1 %[[CMP2_A]], label %[[HEADER_A]], label %[[LOOP_EXIT_A:.*]]
+;
+; CHECK: [[LOOP_EXIT_A]]:
+; CHECK-NEXT: %[[LCSSA_A:.*]] = phi i32 [ %[[TMP_B_SUM_A]], %[[LATCH_A]] ]
+; CHECK-NEXT: br label %loop.exit2
+
+body.b:
+ %tmp.b.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ], [ %tmp.a.sum, %body.a ]
+ %tmp.b = call i32 @b()
+ %tmp.b.sum = add i32 %tmp.b.phi, %tmp.b
+ br label %latch
+; Unswitched 'b' loop.
+;
+; CHECK: [[ENTRY_SPLIT_B]]:
+; CHECK-NEXT: br label %[[HEADER_B:.*]]
+;
+; CHECK: [[HEADER_B]]:
+; CHECK-NEXT: %[[TMP_B:.*]] = call i32 @d()
+; CHECK-NEXT: %[[CMP1_B:.*]] = icmp eq i32 %[[TMP_B]], 0
+; CHECK-NEXT: br i1 %[[CMP1_B]], label %[[BODY_A_B:.*]], label %[[DISPATCH_B:.*]]
+;
+; CHECK: [[DISPATCH_B]]:
+; CHECK-NEXT: br label %[[BODY_B_B]]
+;
+; CHECK: [[BODY_A_B]]:
+; CHECK-NEXT: %[[TMP_A_PHI_B:.*]] = phi i32 [ 0, %[[HEADER_B]] ]
+; CHECK-NEXT: %[[TMP_A_B:.*]] = call i32 @a()
+; CHECK-NEXT: %[[TMP_A_SUM_B:.*]] = add i32 %[[TMP_A_PHI_B]], %[[TMP_A_B]]
+; CHECK-NEXT: br label %[[BODY_B_B:.*]]
+;
+; CHECK: [[BODY_B_B]]:
+; CHECK-NEXT: %[[TMP_B_PHI_B:.*]] = phi i32 [ %[[TMP_B]], %[[DISPATCH_B]] ], [ %[[TMP_A_SUM_B]], %[[BODY_A_B]] ]
+; CHECK-NEXT: %[[TMP_B_B:.*]] = call i32 @b()
+; CHECK-NEXT: %[[TMP_B_SUM_B:.*]] = add i32 %[[TMP_B_PHI_B]], %[[TMP_B_B]]
+; CHECK-NEXT: br label %[[LATCH_B:.*]]
+;
+; CHECK: [[LATCH_B]]:
+; CHECK-NEXT: %[[CMP2_B:.*]] = icmp slt i32 %[[TMP_B_SUM_B]], 42
+; CHECK: br i1 %[[CMP2_B]], label %[[HEADER_B]], label %[[LOOP_EXIT_B:.*]]
+;
+; CHECK: [[LOOP_EXIT_B]]:
+; CHECK-NEXT: %[[LCSSA_B:.*]] = phi i32 [ %[[TMP_B_SUM_B]], %[[LATCH_B]] ]
+; CHECK-NEXT: br label %[[LOOP_EXIT2_SPLIT:.*]]
+
+latch:
+ %cmp2 = icmp slt i32 %tmp.b.sum, 42
+ br i1 %cmp2, label %header, label %loop.exit2
+
+loop.exit1:
+ %l1.phi = phi i32 [ %tmp, %dispatch ], [ %tmp, %dispatch ]
+ br label %exit
+; Unswitched 'exit' loop.
+;
+; CHECK: [[ENTRY_SPLIT_EXIT]]:
+; CHECK-NEXT: br label %[[HEADER_EXIT:.*]]
+;
+; CHECK: [[HEADER_EXIT]]:
+; CHECK-NEXT: %[[TMP_EXIT:.*]] = call i32 @d()
+; CHECK-NEXT: %[[CMP1_EXIT:.*]] = icmp eq i32 %[[TMP_EXIT]], 0
+; CHECK-NEXT: br i1 %[[CMP1_EXIT]], label %[[BODY_A_EXIT:.*]], label %[[DISPATCH_EXIT:.*]]
+;
+; CHECK: [[DISPATCH_EXIT]]:
+; CHECK-NEXT: %[[TMP_LCSSA:.*]] = phi i32 [ %[[TMP_EXIT]], %[[HEADER_EXIT]] ]
+; CHECK-NEXT: br label %loop.exit1
+;
+; CHECK: [[BODY_A_EXIT]]:
+; CHECK-NEXT: %[[TMP_A_PHI_EXIT:.*]] = phi i32 [ 0, %[[HEADER_EXIT]] ]
+; CHECK-NEXT: %[[TMP_A_EXIT:.*]] = call i32 @a()
+; CHECK-NEXT: %[[TMP_A_SUM_EXIT:.*]] = add i32 %[[TMP_A_PHI_EXIT]], %[[TMP_A_EXIT]]
+; CHECK-NEXT: br label %[[BODY_B_EXIT:.*]]
+;
+; CHECK: [[BODY_B_EXIT]]:
+; CHECK-NEXT: %[[TMP_B_PHI_EXIT:.*]] = phi i32 [ %[[TMP_A_SUM_EXIT]], %[[BODY_A_EXIT]] ]
+; CHECK-NEXT: %[[TMP_B_EXIT:.*]] = call i32 @b()
+; CHECK-NEXT: %[[TMP_B_SUM_EXIT:.*]] = add i32 %[[TMP_B_PHI_EXIT]], %[[TMP_B_EXIT]]
+; CHECK-NEXT: br label %[[LATCH_EXIT:.*]]
+;
+; CHECK: [[LATCH_EXIT]]:
+; CHECK-NEXT: %[[CMP2_EXIT:.*]] = icmp slt i32 %[[TMP_B_SUM_EXIT]], 42
+; CHECK: br i1 %[[CMP2_EXIT]], label %[[HEADER_EXIT]], label %[[LOOP_EXIT_EXIT:.*]]
+;
+; CHECK: loop.exit1:
+; CHECK-NEXT: %[[L1_PHI:.*]] = phi i32 [ %[[TMP_LCSSA]], %[[DISPATCH_EXIT]] ]
+; CHECK-NEXT: br label %exit
+;
+; CHECK: [[LOOP_EXIT_EXIT]]:
+; CHECK-NEXT: %[[L2_PHI:.*]] = phi i32 [ %[[TMP_B_SUM_EXIT]], %[[LATCH_EXIT]] ]
+; CHECK-NEXT: br label %[[LOOP_EXIT2_SPLIT]]
+
+loop.exit2:
+ %l2.phi = phi i32 [ %tmp.b.sum, %latch ]
+ br label %exit
+; CHECK: [[LOOP_EXIT2_SPLIT]]:
+; CHECK-NEXT: %[[LOOP_EXIT_PHI1:.*]] = phi i32 [ %[[L2_PHI]], %[[LOOP_EXIT_EXIT]] ], [ %[[LCSSA_B]], %[[LOOP_EXIT_B]] ]
+; CHECK-NEXT: br label %loop.exit2
+;
+; CHECK: loop.exit2:
+; CHECK-NEXT: %[[LOOP_EXIT_PHI2:.*]] = phi i32 [ %[[LOOP_EXIT_PHI1]], %[[LOOP_EXIT2_SPLIT]] ], [ %[[LCSSA_A]], %[[LOOP_EXIT_A]] ]
+; CHECK-NEXT: br label %exit
+
+exit:
+ %l.phi = phi i32 [ %l1.phi, %loop.exit1 ], [ %l2.phi, %loop.exit2 ]
+ ret i32 %l.phi
+; CHECK: exit:
+; CHECK-NEXT: %[[EXIT_PHI:.*]] = phi i32 [ %[[L1_PHI]], %loop.exit1 ], [ %[[LOOP_EXIT_PHI2]], %loop.exit2 ]
+; CHECK-NEXT: ret i32 %[[EXIT_PHI]]
+}
+
; Unswitch will not actually change the loop nest from:
; A < B < C
define void @hoist_inner_loop0() {
@@ -2991,7 +3343,7 @@ b.header:
; CHECK-NEXT: br label %[[C_HEADER_US:.*]]
;
; CHECK: [[C_HEADER_US]]:
-; CHECK-NEXT: call void @c()
+; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %[[B_LATCH_SPLIT_US:.*]]
;
; CHECK: [[B_LATCH_SPLIT_US]]:
@@ -3001,10 +3353,10 @@ b.header:
; CHECK-NEXT: br label %c.header
c.header:
- call void @c()
+ call i32 @c()
br i1 %v1, label %b.latch, label %c.latch
; CHECK: c.header:
-; CHECK-NEXT: call void @c()
+; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %c.latch
c.latch:
@@ -3066,7 +3418,7 @@ b.header:
; CHECK-NEXT: br label %[[C_HEADER_US:.*]]
;
; CHECK: [[C_HEADER_US]]:
-; CHECK-NEXT: call void @c()
+; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %[[B_LATCH_US:.*]]
;
; CHECK: [[B_LATCH_US]]:
@@ -3077,10 +3429,10 @@ b.header:
; CHECK-NEXT: br label %c.header
c.header:
- call void @c()
+ call i32 @c()
br i1 %v1, label %b.latch, label %c.latch
; CHECK: c.header:
-; CHECK-NEXT: call void @c()
+; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %c.latch
c.latch:
@@ -3154,7 +3506,7 @@ b.header:
; CHECK-NEXT: br label %[[C_HEADER_US:.*]]
;
; CHECK: [[C_HEADER_US]]:
-; CHECK-NEXT: call void @c()
+; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %[[B_LATCH_US:.*]]
;
; CHECK: [[B_LATCH_US]]:
@@ -3166,10 +3518,10 @@ b.header:
; CHECK-NEXT: br label %c.header
c.header:
- call void @c()
+ call i32 @c()
br i1 %v1, label %b.latch, label %c.latch
; CHECK: c.header:
-; CHECK-NEXT: call void @c()
+; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %c.latch
c.latch:
@@ -3234,7 +3586,7 @@ b.header:
; CHECK-NEXT: br label %[[C_HEADER_US:.*]]
;
; CHECK: [[C_HEADER_US]]:
-; CHECK-NEXT: call void @c()
+; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %[[B_LATCH_US:.*]]
;
; CHECK: [[B_LATCH_US]]:
@@ -3246,10 +3598,10 @@ b.header:
; CHECK-NEXT: br label %c.header
c.header:
- call void @c()
+ call i32 @c()
br i1 %v1, label %b.latch, label %c.body
; CHECK: c.header:
-; CHECK-NEXT: call void @c()
+; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %c.body
c.body:
@@ -3332,7 +3684,7 @@ c.header:
; CHECK-NEXT: br label %[[D_HEADER_US:.*]]
;
; CHECK: [[D_HEADER_US]]:
-; CHECK-NEXT: call void @d()
+; CHECK-NEXT: call i32 @d()
; CHECK-NEXT: br label %[[C_LATCH_US:.*]]
;
; CHECK: [[C_LATCH_US]]:
@@ -3342,10 +3694,10 @@ c.header:
; CHECK-NEXT: br label %d.header
d.header:
- call void @d()
+ call i32 @d()
br i1 %v1, label %c.latch, label %d.exiting1
; CHECK: d.header:
-; CHECK-NEXT: call void @d()
+; CHECK-NEXT: call i32 @d()
; CHECK-NEXT: br label %d.exiting1
d.exiting1:
@@ -3445,7 +3797,7 @@ c.header:
; CHECK-NEXT: br label %[[D_HEADER_US:.*]]
;
; CHECK: [[D_HEADER_US]]:
-; CHECK-NEXT: call void @d()
+; CHECK-NEXT: call i32 @d()
; CHECK-NEXT: br label %[[C_LATCH_US:.*]]
;
; CHECK: [[C_LATCH_US]]:
@@ -3457,10 +3809,10 @@ c.header:
; CHECK-NEXT: br label %d.header
d.header:
- call void @d()
+ call i32 @d()
br i1 %v1, label %c.latch, label %d.latch
; CHECK: d.header:
-; CHECK-NEXT: call void @d()
+; CHECK-NEXT: call i32 @d()
; CHECK-NEXT: br label %d.latch
d.latch:
@@ -3531,7 +3883,7 @@ b.header:
; CHECK-NEXT: br label %[[C_HEADER_US:.*]]
;
; CHECK: [[C_HEADER_US]]:
-; CHECK-NEXT: call void @c()
+; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %[[B_LATCH_US:.*]]
;
; CHECK: [[B_LATCH_US]]:
@@ -3543,14 +3895,14 @@ b.header:
; CHECK-NEXT: br label %c.header
c.header:
- call void @c()
+ call i32 @c()
switch i32 %v1, label %c.latch [
i32 1, label %b.latch
i32 2, label %b.latch
i32 3, label %b.latch
]
; CHECK: c.header:
-; CHECK-NEXT: call void @c()
+; CHECK-NEXT: call i32 @c()
; CHECK-NEXT: br label %c.latch
c.latch:
OpenPOWER on IntegriCloud