summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp29
-rw-r--r--llvm/test/CodeGen/X86/block-placement.ll53
-rw-r--r--llvm/test/CodeGen/X86/code_placement_cold_loop_blocks.ll5
3 files changed, 83 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 2d4b95974cc..c8f8dda0c2d 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1944,6 +1944,35 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
if (ExitIt == LoopChain.end())
return;
+ // Rotating a loop exit to the bottom when there is a fallthrough to top
+ // trades the entry fallthrough for an exit fallthrough.
+ // If there is no bottom->top edge, but the chosen exit block does have
+ // a fallthrough, we break that fallthrough for nothing in return.
+
+ // Let's consider an example. We have a built chain of basic blocks
+ // B1, B2, ..., Bn, where Bk is a ExitingBB - chosen exit block.
+ // By doing a rotation we get
+ // Bk+1, ..., Bn, B1, ..., Bk
+ // Break of fallthrough to B1 is compensated by a fallthrough from Bk.
+ // If we had a fallthrough Bk -> Bk+1 it is broken now.
+ // It might be compensated by fallthrough Bn -> B1.
+ // So we have a condition to avoid creation of extra branch by loop rotation.
+ // All below must be true to avoid loop rotation:
+ // If there is a fallthrough to top (B1)
+ // There was fallthrough from chosen exit block (Bk) to next one (Bk+1)
+ // There is no fallthrough from bottom (Bn) to top (B1).
+ // Please note that there is no exit fallthrough from Bn because we checked it
+ // above.
+ if (ViableTopFallthrough) {
+ MachineBasicBlock *NextBlockInChain = *std::next(ExitIt);
+ MachineBasicBlock *Bottom = *std::prev(LoopChain.end());
+ if (NextBlockInChain->isPredecessor(ExitingBB))
+ if (!Top->isPredecessor(Bottom))
+ return;
+ }
+
+ DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB)
+ << " at bottom\n");
std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
}
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll
index c7de65d8450..1975b036eb2 100644
--- a/llvm/test/CodeGen/X86/block-placement.ll
+++ b/llvm/test/CodeGen/X86/block-placement.ll
@@ -354,6 +354,7 @@ define void @unnatural_cfg2() {
; single-source GCC.
; CHECK-LABEL: unnatural_cfg2
; CHECK: %entry
+; CHECK: %loop.header
; CHECK: %loop.body1
; CHECK: %loop.body2
; CHECK: %loop.body4
@@ -361,7 +362,6 @@ define void @unnatural_cfg2() {
; CHECK: %loop.inner2.begin
; CHECK: %loop.body3
; CHECK: %loop.inner1.begin
-; CHECK: %loop.header
; CHECK: %bail
entry:
@@ -1491,6 +1491,54 @@ ret: ; preds = %endif, %then
ret void
}
+define i32 @not_rotate_if_extra_branch(i32 %count) {
+; Test checks that there is no loop rotation
+; if it introduces extra branch.
+; Specifically in this case because best exit is .header
+; but it has fallthrough to .middle block and last block in
+; loop chain .slow does not have afallthrough to .header.
+; CHECK-LABEL: not_rotate_if_extra_branch
+; CHECK: %.entry
+; CHECK: %.header
+; CHECK: %.middle
+; CHECK: %.backedge
+; CHECK: %.slow
+; CHECK: %.bailout
+; CHECK: %.stop
+.entry:
+ %sum.0 = shl nsw i32 %count, 1
+ br label %.header
+
+.header:
+ %i = phi i32 [ %i.1, %.backedge ], [ 0, %.entry ]
+ %sum = phi i32 [ %sum.1, %.backedge ], [ %sum.0, %.entry ]
+ %is_exc = icmp sgt i32 %i, 9000000
+ br i1 %is_exc, label %.bailout, label %.middle, !prof !13
+
+.bailout:
+ %sum.2 = add nsw i32 %count, 1
+ br label %.stop
+
+.middle:
+ %pr.1 = and i32 %i, 1023
+ %pr.2 = icmp eq i32 %pr.1, 0
+ br i1 %pr.2, label %.slow, label %.backedge, !prof !14
+
+.slow:
+ tail call void @effect(i32 %sum)
+ br label %.backedge
+
+.backedge:
+ %sum.1 = add nsw i32 %i, %sum
+ %i.1 = add nsw i32 %i, 1
+ %end = icmp slt i32 %i.1, %count
+ br i1 %end, label %.header, label %.stop, !prof !15
+
+.stop:
+ %sum.phi = phi i32 [ %sum.1, %.backedge ], [ %sum.2, %.bailout ]
+ ret i32 %sum.phi
+}
+
declare void @effect(i32)
!5 = !{!"branch_weights", i32 84, i32 16}
@@ -1501,3 +1549,6 @@ declare void @effect(i32)
!10 = !{!"branch_weights", i32 90, i32 10}
!11 = !{!"branch_weights", i32 1, i32 1}
!12 = !{!"branch_weights", i32 5, i32 3}
+!13 = !{!"branch_weights", i32 1, i32 1}
+!14 = !{!"branch_weights", i32 1, i32 1023}
+!15 = !{!"branch_weights", i32 4095, i32 1}
diff --git a/llvm/test/CodeGen/X86/code_placement_cold_loop_blocks.ll b/llvm/test/CodeGen/X86/code_placement_cold_loop_blocks.ll
index d7dc8defac3..875d791dc80 100644
--- a/llvm/test/CodeGen/X86/code_placement_cold_loop_blocks.ll
+++ b/llvm/test/CodeGen/X86/code_placement_cold_loop_blocks.ll
@@ -37,7 +37,7 @@ end:
ret void
}
-define void @nested_loop_0() !prof !1 {
+define void @nested_loop_0(i1 %flag) !prof !1 {
; Test if a block that is cold in the inner loop but not cold in the outer loop
; will merged to the outer loop chain.
;
@@ -68,8 +68,7 @@ if.then:
if.else:
call void @e()
- %call2 = call zeroext i1 @a()
- br i1 %call2, label %header2, label %header, !prof !3
+ br i1 %flag, label %header2, label %header, !prof !3
end:
call void @f()
OpenPOWER on IntegriCloud