summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp10
-rw-r--r--llvm/test/CodeGen/X86/loop-blocks.ll35
2 files changed, 45 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 2adbe48dc7f..88457ba835a 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -966,6 +966,16 @@ void MachineBlockPlacement::buildChain(
MachineBasicBlock *
MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
+ // Placing the latch block before the header may introduce an extra branch
+ // that skips this block the first time the loop is executed, which we want
+ // to avoid when optimising for size.
+ // FIXME: in theory there is a case that does not introduce a new branch,
+ // i.e. when the layout predecessor does not fallthrough to the loop header.
+ // In practice this never happens though: there always seems to be a preheader
+ // that can fallthrough and that is also placed before the header.
+ if (F->getFunction()->optForSize())
+ return L.getHeader();
+
// Check that the header hasn't been fused with a preheader block due to
// crazy branches. If it has, we need to start with the header at the top to
// prevent pulling the preheader into the loop body.
diff --git a/llvm/test/CodeGen/X86/loop-blocks.ll b/llvm/test/CodeGen/X86/loop-blocks.ll
index 1a1d11e6cb3..fc6a357523f 100644
--- a/llvm/test/CodeGen/X86/loop-blocks.ll
+++ b/llvm/test/CodeGen/X86/loop-blocks.ll
@@ -228,6 +228,41 @@ done:
ret void
}
+; This is exactly the same function as slightly_more_involved.
+; The difference is that when optimising for size, we do not want
+; to see this reordering.
+
+; CHECK-LABEL: slightly_more_involved_2:
+; CHECK-NOT: jmp .LBB5_1
+; CHECK: .LBB5_1:
+; CHECK-NEXT: callq body
+
+define void @slightly_more_involved_2() #0 {
+entry:
+ br label %loop
+
+loop:
+ call void @body()
+ %t0 = call i32 @get()
+ %t1 = icmp slt i32 %t0, 2
+ br i1 %t1, label %block_a, label %bb
+
+bb:
+ %t2 = call i32 @get()
+ %t3 = icmp slt i32 %t2, 99
+ br i1 %t3, label %exit, label %loop
+
+block_a:
+ call void @bar99()
+ br label %loop
+
+exit:
+ call void @exit()
+ ret void
+}
+
+attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }
+
declare void @bar99() nounwind
declare void @bar100() nounwind
declare void @bar101() nounwind
OpenPOWER on IntegriCloud