summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp143
-rw-r--r--llvm/test/CodeGen/X86/branchfolding-debug-invariant.mir135
2 files changed, 183 insertions, 95 deletions
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 637c3001435..fbf87a52e53 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -302,114 +302,56 @@ static unsigned HashEndOfMBB(const MachineBasicBlock &MBB) {
return HashMachineInstr(*I);
}
-/// Whether MI should be counted as an instruction when calculating common tail.
+/// Whether MI should be counted as an instruction when calculating common tail.
static bool countsAsInstruction(const MachineInstr &MI) {
return !(MI.isDebugInstr() || MI.isCFIInstruction());
}
-/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
-/// of instructions they actually have in common together at their end. Return
-/// iterators for the first shared instruction in each block.
+/// Iterate backwards from the given iterator \p I, towards the beginning of the
+/// block. If a MI satisfying 'countsAsInstruction' is found, return an iterator
+/// pointing to that MI. If no such MI is found, return the end iterator.
+static MachineBasicBlock::iterator
+skipBackwardPastNonInstructions(MachineBasicBlock::iterator I,
+ MachineBasicBlock *MBB) {
+ while (I != MBB->begin()) {
+ --I;
+ if (countsAsInstruction(*I))
+ return I;
+ }
+ return MBB->end();
+}
+
+/// Given two machine basic blocks, return the number of instructions they
+/// actually have in common together at their end. If a common tail is found (at
+/// least by one instruction), then iterators for the first shared instruction
+/// in each block are returned as well.
+///
+/// Non-instructions according to countsAsInstruction are ignored.
static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2,
MachineBasicBlock::iterator &I1,
MachineBasicBlock::iterator &I2) {
- I1 = MBB1->end();
- I2 = MBB2->end();
+ MachineBasicBlock::iterator MBBI1 = MBB1->end();
+ MachineBasicBlock::iterator MBBI2 = MBB2->end();
unsigned TailLen = 0;
- while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
- --I1; --I2;
- // Skip debugging pseudos; necessary to avoid changing the code.
- while (!countsAsInstruction(*I1)) {
- if (I1==MBB1->begin()) {
- while (!countsAsInstruction(*I2)) {
- if (I2==MBB2->begin()) {
- // I1==DBG at begin; I2==DBG at begin
- goto SkipTopCFIAndReturn;
- }
- --I2;
- }
- ++I2;
- // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
- goto SkipTopCFIAndReturn;
- }
- --I1;
- }
- // I1==first (untested) non-DBG preceding known match
- while (!countsAsInstruction(*I2)) {
- if (I2==MBB2->begin()) {
- ++I1;
- // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
- goto SkipTopCFIAndReturn;
- }
- --I2;
- }
- // I1, I2==first (untested) non-DBGs preceding known match
- if (!I1->isIdenticalTo(*I2) ||
+ while (true) {
+ MBBI1 = skipBackwardPastNonInstructions(MBBI1, MBB1);
+ MBBI2 = skipBackwardPastNonInstructions(MBBI2, MBB2);
+ if (MBBI1 == MBB1->end() || MBBI2 == MBB2->end())
+ break;
+ if (!MBBI1->isIdenticalTo(*MBBI2) ||
// FIXME: This check is dubious. It's used to get around a problem where
// people incorrectly expect inline asm directives to remain in the same
// relative order. This is untenable because normal compiler
// optimizations (like this one) may reorder and/or merge these
// directives.
- I1->isInlineAsm()) {
- ++I1; ++I2;
+ MBBI1->isInlineAsm()) {
break;
}
++TailLen;
- }
- // Back past possible debugging pseudos at beginning of block. This matters
- // when one block differs from the other only by whether debugging pseudos
- // are present at the beginning. (This way, the various checks later for
- // I1==MBB1->begin() work as expected.)
- if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
- --I2;
- while (I2->isDebugInstr()) {
- if (I2 == MBB2->begin())
- return TailLen;
- --I2;
- }
- ++I2;
- }
- if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
- --I1;
- while (I1->isDebugInstr()) {
- if (I1 == MBB1->begin())
- return TailLen;
- --I1;
- }
- ++I1;
- }
-
-SkipTopCFIAndReturn:
- // Ensure that I1 and I2 do not point to a CFI_INSTRUCTION. This can happen if
- // I1 and I2 are non-identical when compared and then one or both of them ends
- // up pointing to a CFI instruction after being incremented. For example:
- /*
- BB1:
- ...
- INSTRUCTION_A
- ADD32ri8 <- last common instruction
- ...
- BB2:
- ...
- INSTRUCTION_B
- CFI_INSTRUCTION
- ADD32ri8 <- last common instruction
- ...
- */
- // When INSTRUCTION_A and INSTRUCTION_B are compared as not equal, after
- // incrementing the iterators, I1 will point to ADD, however I2 will point to
- // the CFI instruction. Later on, this leads to BB2 being 'hacked off' at the
- // wrong place (in ReplaceTailWithBranchTo()) which results in losing this CFI
- // instruction.
- // Skip CFI_INSTRUCTION and debugging instruction; necessary to avoid changing the code.
- while (I1 != MBB1->end() && !countsAsInstruction(*I1)) {
- ++I1;
- }
-
- while (I2 != MBB2->end() && !countsAsInstruction(*I2)) {
- ++I2;
+ I1 = MBBI1;
+ I2 = MBBI2;
}
return TailLen;
@@ -661,6 +603,17 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
<< " and " << printMBBReference(*MBB2) << " is "
<< CommonTailLen << '\n');
+ // Move the iterators to the beginning of the MBB if we only got debug
+ // instructions before the tail. This is to avoid splitting a block when we
+ // only got debug instructions before the tail (to be invariant on -g).
+ if (skipDebugInstructionsForward(MBB1->begin(), MBB1->end()) == I1)
+ I1 = MBB1->begin();
+ if (skipDebugInstructionsForward(MBB2->begin(), MBB2->end()) == I2)
+ I2 = MBB2->begin();
+
+ bool FullBlockTail1 = I1 == MBB1->begin();
+ bool FullBlockTail2 = I2 == MBB2->begin();
+
// It's almost always profitable to merge any number of non-terminator
// instructions with the block that falls through into the common successor.
// This is true only for a single successor. For multiple successors, we are
@@ -679,7 +632,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// are unlikely to become a fallthrough target after machine block placement.
// Tail merging these blocks is unlikely to create additional unconditional
// branches, and will reduce the size of this cold code.
- if (I1 == MBB1->begin() && I2 == MBB2->begin() &&
+ if (FullBlockTail1 && FullBlockTail2 &&
blockEndsInUnreachable(MBB1) && blockEndsInUnreachable(MBB2))
return true;
@@ -687,16 +640,16 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// a position where the other could fall through into it, merge any number
// of instructions, because it can be done without a branch.
// TODO: If the blocks are not adjacent, move one of them so that they are?
- if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+ if (MBB1->isLayoutSuccessor(MBB2) && FullBlockTail2)
return true;
- if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+ if (MBB2->isLayoutSuccessor(MBB1) && FullBlockTail1)
return true;
// If both blocks are identical and end in a branch, merge them unless they
// both have a fallthrough predecessor and successor.
// We can only do this after block placement because it depends on whether
// there are fallthroughs, and we don't know until after layout.
- if (AfterPlacement && I1 == MBB1->begin() && I2 == MBB2->begin()) {
+ if (AfterPlacement && FullBlockTail1 && FullBlockTail2) {
auto BothFallThrough = [](MachineBasicBlock *MBB) {
if (MBB->succ_size() != 0 && !MBB->canFallThrough())
return false;
@@ -730,7 +683,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// instructions that would be deleted in the merge.
MachineFunction *MF = MBB1->getParent();
return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() &&
- (I1 == MBB1->begin() || I2 == MBB2->begin());
+ (FullBlockTail1 || FullBlockTail2);
}
unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
diff --git a/llvm/test/CodeGen/X86/branchfolding-debug-invariant.mir b/llvm/test/CodeGen/X86/branchfolding-debug-invariant.mir
new file mode 100644
index 00000000000..95c96d06393
--- /dev/null
+++ b/llvm/test/CodeGen/X86/branchfolding-debug-invariant.mir
@@ -0,0 +1,135 @@
+# RUN: llc -mtriple=x86_64-- -run-pass branch-folder -O3 -o - %s | FileCheck %s
+
+---
+name: test1a
+body: |
+ ; CHECK-LABEL: name: test1a
+ ; CHECK: bb.0:
+ ; CHECK: TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+ ; CHECK: JCC_1 %bb.2, 5, implicit $eflags
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+ ; CHECK-NOT: RET
+ ; CHECK: bb.2:
+ ; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+ ; CHECK: RET 0
+ bb.0:
+ TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+ JCC_1 %bb.2, 5, implicit killed $eflags
+
+ bb.1:
+ MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+ MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+ RET 0
+
+ bb.2:
+ MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+ RET 0
+...
+
+---
+name: test1b
+body: |
+
+ ; Verify that we get the same rewrites as in test1a when adding some
+ ; DBG_VALUE instructions in the mix.
+ ;
+ ; CHECK-LABEL: name: test1b
+ ; CHECK: bb.0:
+ ; CHECK: TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+ ; CHECK: JCC_1 %bb.2, 5, implicit $eflags
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+ ; CHECK-NOT: RET
+ ; CHECK: bb.2:
+ ; CHECK: DBG_VALUE
+ ; CHECK: DBG_VALUE
+ ; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+ ; CHECK: RET 0
+ bb.0:
+ TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+ JCC_1 %bb.2, 5, implicit killed $eflags
+
+ bb.1:
+ MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+ MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+ RET 0
+
+ bb.2:
+ DBG_VALUE
+ DBG_VALUE
+ MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+ RET 0
+...
+
+---
+name: test2a
+body: |
+ ; CFI instruction currently prevents the rewrite here (although technically
+ ; I suppose that branch folding could let bb.1 fallthrough into bb.2 here).
+ ;
+ ; CHECK-LABEL: name: test2a
+ ; CHECK: bb.0:
+ ; CHECK: TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+ ; CHECK: JCC_1 %bb.2, 5, implicit killed $eflags
+ ; CHECK: bb.1:
+ ; CHECK: MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+ ; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+ ; CHECK: RET 0
+ ; CHECK: bb.2:
+ ; CHECK: CFI_INSTRUCTION def_cfa_offset 8
+ ; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+ ; CHECK: RET 0
+ bb.0:
+ TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+ JCC_1 %bb.2, 5, implicit killed $eflags
+
+ bb.1:
+ MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+ MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+ RET 0
+
+ bb.2:
+ CFI_INSTRUCTION def_cfa_offset 8
+ MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+ RET 0
+...
+
+---
+name: test2b
+body: |
+ ; Verify that we get the same rewrites as in test1a when adding some
+ ; DBG_VALUE instructions in the mix.
+ ;
+ ; CHECK-LABEL: name: test2b
+ ; CHECK: bb.0:
+ ; CHECK: TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+ ; CHECK: JCC_1 %bb.2, 5, implicit killed $eflags
+ ; CHECK: bb.1:
+ ; CHECK: MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+ ; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+ ; CHECK: RET 0
+ ; CHECK: bb.2:
+ ; CHECK: DBG_VALUE
+ ; CHECK: CFI_INSTRUCTION def_cfa_offset 8
+ ; CHECK: DBG_VALUE
+ ; CHECK: MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+ ; CHECK: RET 0
+ bb.0:
+ TEST8rr killed renamable $al, renamable $al, implicit-def $eflags
+ JCC_1 %bb.2, 5, implicit killed $eflags
+
+ bb.1:
+ MOV8mi $r12, 1, $noreg, 0, $noreg, 0
+ MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+ RET 0
+
+ bb.2:
+ DBG_VALUE
+ CFI_INSTRUCTION def_cfa_offset 8
+ DBG_VALUE
+ MOV8mi $r13, 1, $noreg, 0, $noreg, 0
+ RET 0
+...
OpenPOWER on IntegriCloud