summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Molloy <james.molloy@arm.com>2016-08-31 10:46:33 +0000
committerJames Molloy <james.molloy@arm.com>2016-08-31 10:46:33 +0000
commitc53b40b5096597d54c968c8ab07495225b67b97a (patch)
tree0f308ff731972cfb99c2805f1d192fae93051cf4
parent55bd04cd209e121e2930731685a9b252d60cb77e (diff)
downloadbcm5719-llvm-c53b40b5096597d54c968c8ab07495225b67b97a.tar.gz
bcm5719-llvm-c53b40b5096597d54c968c8ab07495225b67b97a.zip
[SimplifyCFG] Handle tail-sinking of more than 2 incoming branches
This was a real restriction in the original version of SinkIfThenCodeToEnd. Now it's been rewritten, the restriction can be lifted. As part of this, we handle a very common and useful case where one of the incoming branches is actually conditional. Consider: if (a) x(1); else if (b) x(2); This produces the following CFG: [if] / \ [x(1)] [if] | | \ | | \ | [x(2)] | \ | / [ end ] [end] has two unconditional predecessor arcs and one conditional. The conditional refers to the implicit empty 'else' arc. This same pattern can also be caused by an empty default block in a switch. We can't sink the call to x() down to end because no call to x() happens on the third incoming arc (assume that x() has sideeffects for the sake of argument; if something is safe to speculate we could indeed sink nevertheless but this cannot happen in the general case and causes many extra selects). We are now able to detect this case and split off the unconditional arcs to a common successor: [if] / \ [x(1)] [if] | | \ | | \ | [x(2)] | \ / | [sink.split] | \ / [ end ] Now we can sink the call to x() into %sink.split. This can cause significant code simplification in many testcases. llvm-svn: 280217
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp116
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-jumptable.ll13
-rw-r--r--llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/ifcvt-select.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll24
-rw-r--r--llvm/test/MC/ARM/data-in-code.ll4
-rw-r--r--llvm/test/Transforms/SimplifyCFG/sink-common-code.ll110
7 files changed, 222 insertions, 49 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index a0d45413bd1..a9fa7e7ae9b 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1547,20 +1547,62 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
assert(BI1->isUnconditional());
BasicBlock *BBEnd = BI1->getSuccessor(0);
- // We currently only support branch targets with two predecessors.
- // FIXME: this is an arbitrary restriction and should be lifted.
- SmallVector<BasicBlock*,4> Blocks;
- for (auto *BB : predecessors(BBEnd))
- Blocks.push_back(BB);
- if (Blocks.size() != 2 ||
- !all_of(Blocks, [](const BasicBlock *BB) {
- auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
- return BI && BI->isUnconditional();
- }))
+ // We support two situations:
+ // (1) all incoming arcs are unconditional
+ // (2) one incoming arc is conditional
+ //
+ // (2) is very common in switch defaults and
+ // else-if patterns;
+ //
+ // if (a) f(1);
+ // else if (b) f(2);
+ //
+ // produces:
+ //
+ // [if]
+ // / \
+ // [f(1)] [if]
+ // | | \
+ // | | \
+ // | [f(2)]|
+ // \ | /
+ // [ end ]
+ //
+ // [end] has two unconditional predecessor arcs and one conditional. The
+ // conditional refers to the implicit empty 'else' arc. This conditional
+ // arc can also be caused by an empty default block in a switch.
+ //
+ // In this case, we attempt to sink code from all *unconditional* arcs.
+ // If we can sink instructions from these arcs (determined during the scan
+ // phase below) we insert a common successor for all unconditional arcs and
+ // connect that to [end], to enable sinking:
+ //
+ // [if]
+ // / \
+ // [x(1)] [if]
+ // | | \
+ // | | \
+ // | [x(2)] |
+ // \ / |
+ // [sink.split] |
+ // \ /
+ // [ end ]
+ //
+ SmallVector<BasicBlock*,4> UnconditionalPreds;
+ Instruction *Cond = nullptr;
+ for (auto *B : predecessors(BBEnd)) {
+ auto *T = B->getTerminator();
+ if (isa<BranchInst>(T) && cast<BranchInst>(T)->isUnconditional())
+ UnconditionalPreds.push_back(B);
+ else if ((isa<BranchInst>(T) || isa<SwitchInst>(T)) && !Cond)
+ Cond = T;
+ else
+ return false;
+ }
+ if (UnconditionalPreds.size() < 2)
return false;
-
+
bool Changed = false;
-
// We take a two-step approach to tail sinking. First we scan from the end of
// each block upwards in lockstep. If the n'th instruction from the end of each
// block can be sunk, those instructions are added to ValuesToSink and we
@@ -1570,7 +1612,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
unsigned ScanIdx = 0;
SmallPtrSet<Value*,4> InstructionsToSink;
DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
- LockstepReverseIterator LRI(Blocks);
+ LockstepReverseIterator LRI(UnconditionalPreds);
while (LRI.isValid() &&
canSinkInstructions(*LRI, PHIOperands)) {
DEBUG(dbgs() << "SINK: instruction can be sunk: " << (*LRI)[0] << "\n");
@@ -1579,6 +1621,35 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
--LRI;
}
+ auto ProfitableToSinkLastInstruction = [&]() {
+ LRI.reset();
+ unsigned NumPHIdValues = 0;
+ for (auto *I : *LRI)
+ for (auto *V : PHIOperands[I])
+ if (InstructionsToSink.count(V) == 0)
+ ++NumPHIdValues;
+ DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
+ assert((NumPHIdValues % UnconditionalPreds.size() == 0) &&
+ "Every operand must either be PHId or not PHId!");
+
+ return NumPHIdValues / UnconditionalPreds.size() <= 1;
+ };
+
+ if (ScanIdx > 0 && Cond) {
+ // Check if we would actually sink anything first!
+ if (!ProfitableToSinkLastInstruction())
+ return false;
+
+ DEBUG(dbgs() << "SINK: Splitting edge\n");
+ // We have a conditional edge and we're going to sink some instructions.
+ // Insert a new block postdominating all blocks we're going to sink from.
+ if (!SplitBlockPredecessors(BI1->getSuccessor(0), UnconditionalPreds,
+ ".sink.split"))
+ // Edges couldn't be split.
+ return false;
+ Changed = true;
+ }
+
// Now that we've analyzed all potential sinking candidates, perform the
// actual sink. We iteratively sink the last non-terminator of the source
// blocks into their common successor unless doing so would require too
@@ -1593,29 +1664,20 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
// This is unlikely in practice though.
for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) {
DEBUG(dbgs() << "SINK: Sink: "
- << *Blocks[0]->getTerminator()->getPrevNode()
+ << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
<< "\n");
// Because we've sunk every instruction in turn, the current instruction to
// sink is always at index 0.
- LRI.reset();
- unsigned NumPHIdValues = 0;
- for (auto *I : *LRI)
- for (auto *V : PHIOperands[I])
- if (InstructionsToSink.count(V) == 0)
- ++NumPHIdValues;
- DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
- assert((NumPHIdValues % Blocks.size() == 0) &&
- "Every operand must either be PHId or not PHId!");
-
- if (NumPHIdValues / Blocks.size() > 1)
+ if (!ProfitableToSinkLastInstruction()) {
// Too many PHIs would be created.
+ DEBUG(dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
break;
+ }
- sinkLastInstruction(Blocks);
+ sinkLastInstruction(UnconditionalPreds);
NumSinkCommons++;
Changed = true;
}
-
return Changed;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-jumptable.ll b/llvm/test/CodeGen/AArch64/arm64-jumptable.ll
index 4635cfe5858..c7f213fa846 100644
--- a/llvm/test/CodeGen/AArch64/arm64-jumptable.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-jumptable.ll
@@ -2,25 +2,26 @@
; RUN: llc -mtriple=arm64-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-LINUX
; <rdar://11417675>
-define void @sum(i32* %to) {
+define void @sum(i32 %a, i32* %to, i32 %c) {
entry:
- switch i32 undef, label %exit [
+ switch i32 %a, label %exit [
i32 1, label %bb1
i32 2, label %bb2
i32 3, label %bb3
i32 4, label %bb4
]
bb1:
- store i32 undef, i32* %to
+ %b = add i32 %c, 1
+ store i32 %b, i32* %to
br label %exit
bb2:
- store i32 undef, i32* %to
+ store i32 2, i32* %to
br label %exit
bb3:
- store i32 undef, i32* %to
+ store i32 3, i32* %to
br label %exit
bb4:
- store i32 undef, i32* %to
+ store i32 4, i32* %to
br label %exit
exit:
ret void
diff --git a/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll b/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
index 12e6f2d4889..3ecb1d49ee1 100644
--- a/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
+++ b/llvm/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
@@ -3,7 +3,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; Function Attrs: norecurse nounwind
define void @foo(i32 %a, i32 %b, float* nocapture %foo_arr) #0 {
-; CHECK: (load 4 from %ir.arrayidx1.{{i[1-2]}}), (load 4 from %ir.arrayidx1.{{i[1-2]}})
+; CHECK: (load 4 from %ir.arrayidx1.{{i[1-2]}})
entry:
%cmp = icmp sgt i32 %a, 0
br i1 %cmp, label %if.then, label %if.end
diff --git a/llvm/test/CodeGen/AArch64/ifcvt-select.ll b/llvm/test/CodeGen/AArch64/ifcvt-select.ll
index 4e024d963f2..b4815c4fb3e 100644
--- a/llvm/test/CodeGen/AArch64/ifcvt-select.ll
+++ b/llvm/test/CodeGen/AArch64/ifcvt-select.ll
@@ -4,7 +4,7 @@
define i32 @foo(i32 %a, i32 %b) {
entry:
;CHECK-LABEL: foo:
-;CHECK: csinc
+;CHECK: cneg
;CHECK-NOT: csel
%sub = sub nsw i32 %b, %a
%cmp10 = icmp sgt i32 %a, 0
diff --git a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
index f66af7fd627..22d0584f63b 100644
--- a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
+++ b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
@@ -11,9 +11,9 @@
define void @test_i16_2cmp_signed_1() {
; CHECK-LABEL: test_i16_2cmp_signed_1
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.gt
+; CHECK-NEXT: b.lt
; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
entry:
%0 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
%1 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16, %struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
@@ -39,7 +39,7 @@ if.end8: ; preds = %if.else, %if.then7,
define void @test_i16_2cmp_signed_2() {
; CHECK-LABEL: test_i16_2cmp_signed_2
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.le
+; CHECK-NEXT: b.gt
; CHECK-NOT: cmp
; CHECK: b.ge
entry:
@@ -67,9 +67,9 @@ if.end8: ; preds = %if.else, %if.then7,
define void @test_i16_2cmp_unsigned_1() {
; CHECK-LABEL: test_i16_2cmp_unsigned_1
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.hi
+; CHECK-NEXT: b.lo
; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
entry:
%0 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
%1 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16, %struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
@@ -95,7 +95,7 @@ if.end8: ; preds = %if.else, %if.then7,
define void @test_i16_2cmp_unsigned_2() {
; CHECK-LABEL: test_i16_2cmp_unsigned_2
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.ls
+; CHECK-NEXT: b.hi
; CHECK-NOT: cmp
; CHECK: b.hs
entry:
@@ -132,9 +132,9 @@ if.end8: ; preds = %if.else, %if.then7,
define void @test_i8_2cmp_signed_1() {
; CHECK-LABEL: test_i8_2cmp_signed_1
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.gt
+; CHECK-NEXT: b.lt
; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
entry:
%0 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
%1 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8, %struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
@@ -160,7 +160,7 @@ if.end8: ; preds = %if.else, %if.then7,
define void @test_i8_2cmp_signed_2() {
; CHECK-LABEL: test_i8_2cmp_signed_2
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.le
+; CHECK-NEXT: b.gt
; CHECK-NOT: cmp
; CHECK: b.ge
entry:
@@ -188,9 +188,9 @@ if.end8: ; preds = %if.else, %if.then7,
define void @test_i8_2cmp_unsigned_1() {
; CHECK-LABEL: test_i8_2cmp_unsigned_1
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.hi
+; CHECK-NEXT: b.lo
; CHECK-NOT: cmp
-; CHECK: b.ne
+; CHECK: ret
entry:
%0 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
%1 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8, %struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
@@ -216,7 +216,7 @@ if.end8: ; preds = %if.else, %if.then7,
define void @test_i8_2cmp_unsigned_2() {
; CHECK-LABEL: test_i8_2cmp_unsigned_2
; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
-; CHECK-NEXT: b.ls
+; CHECK-NEXT: b.hi
; CHECK-NOT: cmp
; CHECK: b.hs
entry:
diff --git a/llvm/test/MC/ARM/data-in-code.ll b/llvm/test/MC/ARM/data-in-code.ll
index 10657a3fed3..c2194e9179c 100644
--- a/llvm/test/MC/ARM/data-in-code.ll
+++ b/llvm/test/MC/ARM/data-in-code.ll
@@ -9,7 +9,7 @@
;; Ensure that if a jump table is generated that it has Mapping Symbols
;; marking the data-in-code region.
-define void @foo(i32* %ptr) nounwind ssp {
+define void @foo(i32* %ptr, i32 %b) nounwind ssp {
%tmp = load i32, i32* %ptr, align 4
switch i32 %tmp, label %exit [
i32 0, label %bb0
@@ -18,7 +18,7 @@ define void @foo(i32* %ptr) nounwind ssp {
i32 3, label %bb3
]
bb0:
- store i32 0, i32* %ptr, align 4
+ store i32 %b, i32* %ptr, align 4
br label %exit
bb1:
store i32 1, i32* %ptr, align 4
diff --git a/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll b/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll
index 293b7f37c6e..1a4884a6454 100644
--- a/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll
+++ b/llvm/test/Transforms/SimplifyCFG/sink-common-code.ll
@@ -427,6 +427,116 @@ if.end:
; CHECK: load
; CHECK-NOT: load
+define zeroext i1 @test16(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ br label %if.end
+
+if.else:
+ br i1 %flag2, label %if.then2, label %if.end
+
+if.then2:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = zext i1 %cmp2 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+; CHECK-LABEL: test16
+; CHECK: zext
+; CHECK-NOT: zext
+
+define zeroext i1 @test17(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+ switch i32 %flag, label %if.end [
+ i32 0, label %if.then
+ i32 1, label %if.then2
+ ]
+
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ br label %if.end
+
+if.then2:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = zext i1 %cmp2 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %entry ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+; CHECK-LABEL: test17
+; CHECK: if.then:
+; CHECK-NEXT: icmp uge
+; CHECK-NEXT: br label %[[x:.*]]
+
+; CHECK: if.then2:
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp ule
+; CHECK-NEXT: br label %[[x]]
+
+; CHECK: [[x]]:
+; CHECK-NEXT: %[[y:.*]] = phi i1 [ %cmp
+; CHECK-NEXT: %[[z:.*]] = zext i1 %[[y]]
+; CHECK-NEXT: br label %if.end
+
+; CHECK: if.end:
+; CHECK-NEXT: phi i8
+; CHECK-DAG: [ %[[z]], %[[x]] ]
+; CHECK-DAG: [ 0, %entry ]
+
+define zeroext i1 @test18(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+ switch i32 %flag, label %if.then3 [
+ i32 0, label %if.then
+ i32 1, label %if.then2
+ ]
+
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ br label %if.end
+
+if.then2:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = zext i1 %cmp2 to i8
+ br label %if.end
+
+if.then3:
+ %add2 = add i32 %nblks, %blksA
+ %cmp3 = icmp ule i32 %add2, %blksA
+ %frombool4 = zext i1 %cmp3 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ %frombool4, %if.then3 ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+; CHECK-LABEL: test18
+; CHECK: if.end:
+; CHECK-NEXT: %[[x:.*]] = phi i1
+; CHECK-DAG: [ %cmp, %if.then ]
+; CHECK-DAG: [ %cmp2, %if.then2 ]
+; CHECK-DAG: [ %cmp3, %if.then3 ]
+; CHECK-NEXT: zext i1 %[[x]] to i8
+
; CHECK: !0 = !{!1, !1, i64 0}
; CHECK: !1 = !{!"float", !2}
; CHECK: !2 = !{!"an example type tree"}
OpenPOWER on IntegriCloud