summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp26
-rw-r--r--llvm/test/CodeGen/X86/block-placement.ll102
2 files changed, 119 insertions, 9 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index e4ddb043505..ae0f5bd0bcc 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -68,6 +68,13 @@ ExitBlockBias("block-placement-exit-block-bias",
"over the original exit to be considered the new exit."),
cl::init(0), cl::Hidden);
+static cl::opt<bool> PlaceLastSuccessor(
+ "place-last-successor",
+ cl::desc("When selecting a non-successor block, choose the last block to "
+ "have been a successor. This represents the block whose "
+ "predecessor was most recently placed."),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool> OutlineOptionalBranches(
"outline-optional-branches",
cl::desc("Put completely optional branches, i.e. branches with a common "
@@ -443,6 +450,25 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
const BlockFilterSet *BlockFilter) {
+ if (PlaceLastSuccessor) {
+ // If we're just placing the last successor as the best candidate, the
+ // logic is super simple. We skip the already placed entries on the
+ // worklist and return the most recently added entry that isn't placed.
+ while (!WorkList.empty()) {
+ MachineBasicBlock *SuccBB = WorkList.pop_back_val();
+ BlockChain &SuccChain = *BlockToChain.lookup(SuccBB);
+ if (&SuccChain == &Chain) {
+ DEBUG(dbgs() << " " << getBlockName(SuccBB)
+ << " -> Already merged!\n");
+ continue;
+ }
+ assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
+ return SuccBB;
+ }
+
+ return nullptr;
+ }
+
// Once we need to walk the worklist looking for a candidate, cleanup the
// worklist of already placed entries.
// FIXME: If this shows up on profiles, it could be folded (at the cost of
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll
index e0276e42d4d..49ee1b2c162 100644
--- a/llvm/test/CodeGen/X86/block-placement.ll
+++ b/llvm/test/CodeGen/X86/block-placement.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=i686-linux -pre-RA-sched=source < %s | FileCheck %s
+; RUN: llc -mtriple=i686-linux -place-last-successor -pre-RA-sched=source < %s | FileCheck %s
declare void @error(i32 %i, i32 %a, i32 %b)
@@ -17,11 +17,11 @@ define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) {
; CHECK: %else4
; CHECK-NOT: .align
; CHECK: %exit
-; CHECK: %then1
-; CHECK: %then2
-; CHECK: %then3
-; CHECK: %then4
; CHECK: %then5
+; CHECK: %then4
+; CHECK: %then3
+; CHECK: %then2
+; CHECK: %then1
entry:
%gep1 = getelementptr i32, i32* %a, i32 1
@@ -82,9 +82,9 @@ define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
; CHECK-LABEL: test_loop_cold_blocks:
; CHECK: %entry
; CHECK-NOT: .align
-; CHECK: %unlikely1
-; CHECK-NOT: .align
; CHECK: %unlikely2
+; CHECK-NOT: .align
+; CHECK: %unlikely1
; CHECK: .align
; CHECK: %body1
; CHECK: %body2
@@ -135,9 +135,9 @@ define i32 @test_loop_early_exits(i32 %i, i32* %a) {
; CHECK: %body3
; CHECK: %body4
; CHECK: %exit
-; CHECK: %bail1
-; CHECK: %bail2
; CHECK: %bail3
+; CHECK: %bail2
+; CHECK: %bail1
entry:
br label %body1
@@ -1083,3 +1083,87 @@ exit:
%ret = phi i32 [ %val1, %then ], [ %val2, %else ]
ret i32 %ret
}
+
+define void @test_outlined() {
+; This test ends up with diamond control flow in outlined optional regions.
+; These diamonds should still be locally cohensive even when out-of-line due to
+; being cold.
+; CHECK-LABEL: test_outlined:
+; CHECK: %a1
+; CHECK: %a2
+; CHECK: %done
+; CHECK: %b2
+; CHECK: %c2
+; CHECK: %d2
+; CHECK: %f2
+; CHECK: %b1
+; CHECK: %c1
+; CHECK: %d1
+; CHECK: %f1
+
+a1:
+ %call.a1 = call i1 @a1()
+ br i1 %call.a1, label %b1, label %a2, !prof !0
+
+b1:
+ %call.b1 = call i1 @b1()
+ br i1 %call.b1, label %c1, label %d1
+
+c1:
+ call void @c1()
+ br label %f1
+
+d1:
+ call void @d1()
+ br label %f1
+
+f1:
+ call void @f1()
+ br label %a2
+
+a2:
+ %call.a2 = call i1 @a2()
+ br i1 %call.a2, label %b2, label %done, !prof !0
+
+b2:
+ %call.b2 = call i1 @b2()
+ br i1 %call.b2, label %c2, label %d2
+
+c2:
+ call void @c2()
+ br label %f2
+
+d2:
+ call void @d2()
+ br label %f2
+
+f2:
+ call void @f2()
+ br label %done
+
+done:
+ call void @done()
+ ret void
+}
+
+declare i1 @a1()
+
+declare i1 @b1()
+
+declare void @c1()
+
+declare void @d1()
+
+declare void @f1()
+
+declare i1 @a2()
+
+declare i1 @b2()
+
+declare void @c2()
+
+declare void @d2()
+
+declare void @f2()
+
+declare void @done()
OpenPOWER on IntegriCloud