X86: Fold tail calls into conditional branches where possible (PR26302)

When branching to a block that immediately tail calls, it is possible to fold the call directly into the branch if the call is direct and there is no stack adjustment, saving one byte. Example: define void @f(i32 %x, i32 %y) { entry: %p = icmp eq i32 %x, %y br i1 %p, label %bb1, label %bb2 bb1: tail call void @foo() ret void bb2: tail call void @bar() ret void } before: f: movl 4(%esp), %eax cmpl 8(%esp), %eax jne .LBB0_2 jmp foo .LBB0_2: jmp bar after: f: movl 4(%esp), %eax cmpl 8(%esp), %eax jne bar .LBB0_1: jmp foo I don't expect any significant size savings from this (on a Clang bootstrap I saw 288 bytes), but it does make the code a little tighter. This patch only does 32-bit, but 64-bit would work similarly. Differential Revision: https://reviews.llvm.org/D24108 llvm-svn: 280832
author: Hans Wennborg <hans@hanshq.net> 2016-09-07 17:52:14 +0000
committer: Hans Wennborg <hans@hanshq.net> 2016-09-07 17:52:14 +0000
commit: 75e25f6812da5e46a2a8f8dbbeae0f0f3df832d9 (patch)
tree: 4aae7345b3f2ee264c337342b41f7d2e3d4c77fc /llvm/lib/CodeGen
parent: 5ad1cbeecb07c02d5c9e9f27410815556d488452 (diff)
download: bcm5719-llvm-75e25f6812da5e46a2a8f8dbbeae0f0f3df832d9.tar.gz
bcm5719-llvm-75e25f6812da5e46a2a8f8dbbeae0f0f3df832d9.zip
1 files changed, 37 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index c5d4df63605..06a82220875 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -49,6 +49,7 @@ STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
 STATISTIC(NumBranchOpts, "Number of branches optimized");
 STATISTIC(NumTailMerge , "Number of block tails merged");
 STATISTIC(NumHoist     , "Number of times common instructions are hoisted");
+STATISTIC(NumTailCalls,  "Number of tail calls optimized");
 
 static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
                               cl::init(cl::BOU_UNSET), cl::Hidden);
@@ -1448,6 +1449,42 @@ ReoptimizeBlock:
     }
   }
 
+  if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
+      MF.getFunction()->optForSize()) {
+    // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
+    // direction, thereby defeating careful block placement and regressing
+    // performance. Therefore, only consider this for optsize functions.
+    MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
+    if (TII->isUnconditionalTailCall(TailCall)) {
+      MachineBasicBlock *Pred = *MBB->pred_begin();
+      MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+      SmallVector<MachineOperand, 4> PredCond;
+      bool PredAnalyzable =
+          !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
+
+      if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB) {
+        // The predecessor has a conditional branch to this block which consists
+        // of only a tail call. Try to fold the tail call into the conditional
+        // branch.
+        if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
+          // TODO: It would be nice if analyzeBranch() could provide a pointer
+          // to the branch insturction so replaceBranchWithTailCall() doesn't
+          // have to search for it.
+          TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
+          ++NumTailCalls;
+          Pred->removeSuccessor(MBB);
+          MadeChange = true;
+          return MadeChange;
+        }
+      }
+      // If the predecessor is falling through to this block, we could reverse
+      // the branch condition and fold the tail call into that. However, after
+      // that we might have to re-arrange the CFG to fall through to the other
+      // block and there is a high risk of regressing code size rather than
+      // improving it.
+    }
+  }
+
   // Analyze the branch in the current block.
   MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
   SmallVector<MachineOperand, 4> CurCond;
author	Hans Wennborg <hans@hanshq.net>	2016-09-07 17:52:14 +0000
committer	Hans Wennborg <hans@hanshq.net>	2016-09-07 17:52:14 +0000
commit	75e25f6812da5e46a2a8f8dbbeae0f0f3df832d9 (patch)
tree	4aae7345b3f2ee264c337342b41f7d2e3d4c77fc /llvm/lib/CodeGen
parent	5ad1cbeecb07c02d5c9e9f27410815556d488452 (diff)
download	bcm5719-llvm-75e25f6812da5e46a2a8f8dbbeae0f0f3df832d9.tar.gz bcm5719-llvm-75e25f6812da5e46a2a8f8dbbeae0f0f3df832d9.zip