X86 Peephole: fold loads to the source register operand if possible.

Machine CSE and other optimizations can remove instructions so folding is possible at peephole while not possible at ISel. This patch is a rework of r160919 and was tested on clang self-host on my local machine. rdar://10554090 and rdar://11873276 llvm-svn: 161152
author: Manman Ren <mren@apple.com> 2012-08-02 00:56:42 +0000
committer: Manman Ren <mren@apple.com> 2012-08-02 00:56:42 +0000
commit: 5759d012309a3fedab83ad0db16934c5f708d00f (patch)
tree: 2e8ad671d543c3ae9adfd4b4f018587a62d4d3e9 /llvm/lib/CodeGen
parent: 4c4fe84b25a8c866d396d08f3000e7e56ccc0c7d (diff)
download: bcm5719-llvm-5759d012309a3fedab83ad0db16934c5f708d00f.tar.gz
bcm5719-llvm-5759d012309a3fedab83ad0db16934c5f708d00f.zip
1 files changed, 57 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 91c33c4af41..d9474bf2400 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -78,6 +78,7 @@ STATISTIC(NumReuse,      "Number of extension results reused");
 STATISTIC(NumBitcasts,   "Number of bitcasts eliminated");
 STATISTIC(NumCmps,       "Number of compares eliminated");
 STATISTIC(NumImmFold,    "Number of move immediate folded");
+STATISTIC(NumLoadFold,   "Number of loads folded");
 
 namespace {
   class PeepholeOptimizer : public MachineFunctionPass {
@@ -114,6 +115,7 @@ namespace {
     bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
                        SmallSet<unsigned, 4> &ImmDefRegs,
                        DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+    bool isLoadFoldable(MachineInstr *MI, unsigned &FoldAsLoadDefReg);
   };
 }
 
@@ -384,6 +386,29 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
   return false;
 }
 
+/// isLoadFoldable - Check whether MI is a candidate for folding into a later
+/// instruction. We only fold loads to virtual registers and the virtual
+/// register defined has a single use.
+bool PeepholeOptimizer::isLoadFoldable(MachineInstr *MI,
+                                       unsigned &FoldAsLoadDefReg) {
+  if (MI->canFoldAsLoad()) {
+    const MCInstrDesc &MCID = MI->getDesc();
+    if (MCID.getNumDefs() == 1) {
+      unsigned Reg = MI->getOperand(0).getReg();
+      // To reduce compilation time, we check MRI->hasOneUse when inserting
+      // loads. It should be checked when processing uses of the load, since
+      // uses can be removed during peephole.
+      if (!MI->getOperand(0).getSubReg() &&
+          TargetRegisterInfo::isVirtualRegister(Reg) &&
+          MRI->hasOneUse(Reg)) {
+        FoldAsLoadDefReg = Reg;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
                                         SmallSet<unsigned, 4> &ImmDefRegs,
                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
@@ -441,6 +466,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
   SmallPtrSet<MachineInstr*, 8> LocalMIs;
   SmallSet<unsigned, 4> ImmDefRegs;
   DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+  unsigned FoldAsLoadDefReg;
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     MachineBasicBlock *MBB = &*I;
 
@@ -448,6 +474,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
     LocalMIs.clear();
     ImmDefRegs.clear();
     ImmDefMIs.clear();
+    FoldAsLoadDefReg = 0;
 
     bool First = true;
     MachineBasicBlock::iterator PMII;
@@ -456,12 +483,17 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
       MachineInstr *MI = &*MII;
       LocalMIs.insert(MI);
 
+      // If there exists an instruction which belongs to the following
+      // categories, we will discard the load candidate.
       if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
           MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
           MI->hasUnmodeledSideEffects()) {
+        FoldAsLoadDefReg = 0;
         ++MII;
         continue;
       }
+      if (MI->mayStore() || MI->isCall())
+        FoldAsLoadDefReg = 0;
 
       if (MI->isBitcast()) {
         if (optimizeBitcastInstr(MI, MBB)) {
@@ -489,6 +521,31 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
           Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
       }
 
+      // Check whether MI is a load candidate for folding into a later
+      // instruction. If MI is not a candidate, check whether we can fold an
+      // earlier load into MI.
+      if (!isLoadFoldable(MI, FoldAsLoadDefReg) && FoldAsLoadDefReg) {
+        // We need to fold load after optimizeCmpInstr, since optimizeCmpInstr
+        // can enable folding by converting SUB to CMP.
+        MachineInstr *DefMI = 0;
+        MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
+                                                      FoldAsLoadDefReg, DefMI);
+        if (FoldMI) {
+          // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI.
+          LocalMIs.erase(MI);
+          LocalMIs.erase(DefMI);
+          LocalMIs.insert(FoldMI);
+          MI->eraseFromParent();
+          DefMI->eraseFromParent();
+          ++NumLoadFold;
+
+          // MI is replaced with FoldMI.
+          Changed = true;
+          PMII = FoldMI;
+          MII = llvm::next(PMII);
+          continue;
+        }
+      }
       First = false;
       PMII = MII;
       ++MII;
author	Manman Ren <mren@apple.com>	2012-08-02 00:56:42 +0000
committer	Manman Ren <mren@apple.com>	2012-08-02 00:56:42 +0000
commit	5759d012309a3fedab83ad0db16934c5f708d00f (patch)
tree	2e8ad671d543c3ae9adfd4b4f018587a62d4d3e9 /llvm/lib/CodeGen
parent	4c4fe84b25a8c866d396d08f3000e7e56ccc0c7d (diff)
download	bcm5719-llvm-5759d012309a3fedab83ad0db16934c5f708d00f.tar.gz bcm5719-llvm-5759d012309a3fedab83ad0db16934c5f708d00f.zip