Clean up spill weight computation. Also some changes to give loop induction

variable increment / decrement slighter high priority. This has major impact on some micro-benchmarks. On MultiSource/Applications and spec tests, it's a minor win. It also reduce 256.bzip instruction count by 8%, 55 on 164.gzip on i386 / Darwin. llvm-svn: 82485
author: Evan Cheng <evan.cheng@apple.com> 2009-09-21 21:12:25 +0000
committer: Evan Cheng <evan.cheng@apple.com> 2009-09-21 21:12:25 +0000
commit: 255f4164701b7bae414d364746d2457b98c1949f (patch)
tree: 7049dfe936a8c7c03bd1feb7f95c72024068aa11 /llvm
parent: 509027336700d3ebafae951f915360ac567b63dd (diff)
download: bcm5719-llvm-255f4164701b7bae414d364746d2457b98c1949f.tar.gz
bcm5719-llvm-255f4164701b7bae414d364746d2457b98c1949f.zip
9 files changed, 145 insertions, 68 deletions
diff --git a/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp b/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp
index bbb742678fb..068dcda93b5 100644
--- a/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -2535,7 +2535,8 @@ void SimpleRegisterCoalescing::releaseMemory() {
   ReMatDefs.clear();
 }
 
-bool SimpleRegisterCoalescing::isZeroLengthInterval(LiveInterval *li) const {
+/// Returns true if the given live interval is zero length.
+static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) {
   for (LiveInterval::Ranges::const_iterator
          i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
     if (li_->getPrevIndex(i->end) > i->start)
@@ -2543,6 +2544,97 @@ bool SimpleRegisterCoalescing::isZeroLengthInterval(LiveInterval *li) const {
   return true;
 }
 
+void SimpleRegisterCoalescing::CalculateSpillWeights() {
+  SmallSet<unsigned, 4> Processed;
+  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+       mbbi != mbbe; ++mbbi) {
+    MachineBasicBlock* MBB = mbbi;
+    MachineInstrIndex MBBEnd = li_->getMBBEndIdx(MBB);
+    MachineLoop* loop = loopInfo->getLoopFor(MBB);
+    unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
+    bool isExit = loop ? loop->isLoopExit(MBB) : false;
+
+    for (MachineBasicBlock::iterator mii = MBB->begin(), mie = MBB->end();
+         mii != mie; ++mii) {
+      MachineInstr *MI = mii;
+
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &mopi = MI->getOperand(i);
+        if (!mopi.isReg() || mopi.getReg() == 0)
+          continue;
+        unsigned Reg = mopi.getReg();
+        if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg()))
+          continue;
+        // Multiple uses of reg by the same instruction. It should not
+        // contribute to spill weight again.
+        if (!Processed.insert(Reg))
+          continue;
+
+        bool HasDef = mopi.isDef();
+        bool HasUse = mopi.isUse();
+        for (unsigned j = i+1; j != e; ++j) {
+          const MachineOperand &mopj = MI->getOperand(j);
+          if (!mopj.isReg() || mopj.getReg() != Reg)
+            continue;
+          HasDef |= mopj.isDef();
+          HasUse |= mopj.isUse();
+        }
+
+        LiveInterval &RegInt = li_->getInterval(Reg);
+        float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth+1);
+        if (HasDef && isExit) {
+          // Looks like this is a loop count variable update.
+          MachineInstrIndex DefIdx =
+            li_->getDefIndex(li_->getInstructionIndex(MI));
+          const LiveRange *DLR =
+            li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
+          if (DLR->end > MBBEnd)
+            Weight *= 3.0F;
+        }
+        RegInt.weight += Weight;
+      }
+      Processed.clear();
+    }
+  }
+
+  for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
+    LiveInterval &LI = *I->second;
+    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+      // If the live interval length is essentially zero, i.e. in every live
+      // range the use follows def immediately, it doesn't make sense to spill
+      // it and hope it will be easier to allocate for this li.
+      if (isZeroLengthInterval(&LI, li_)) {
+        LI.weight = HUGE_VALF;
+        continue;
+      }
+
+      bool isLoad = false;
+      SmallVector<LiveInterval*, 4> SpillIs;
+      if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
+        // If all of the definitions of the interval are re-materializable,
+        // it is a preferred candidate for spilling. If non of the defs are
+        // loads, then it's potentially very cheap to re-materialize.
+        // FIXME: this gets much more complicated once we support non-trivial
+        // re-materialization.
+        if (isLoad)
+          LI.weight *= 0.9F;
+        else
+          LI.weight *= 0.5F;
+      }
+
+      // Slightly prefer live interval that has been assigned a preferred reg.
+      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
+      if (Hint.first || Hint.second)
+        LI.weight *= 1.01F;
+
+      // Divide the weight of the interval by its size.  This encourages
+      // spilling of intervals that are large and have few uses, and
+      // discourages spilling of small intervals with many uses.
+      LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
+    }
+  }
+}
+
 
 bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   mf_ = &fn;
@@ -2581,8 +2673,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
        mbbi != mbbe; ++mbbi) {
     MachineBasicBlock* mbb = mbbi;
-    unsigned loopDepth = loopInfo->getLoopDepth(mbb);
-
     for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
          mii != mie; ) {
       MachineInstr *MI = mii;
@@ -2656,62 +2746,12 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
         mii = mbbi->erase(mii);
         ++numPeep;
       } else {
-        SmallSet<unsigned, 4> UniqueUses;
-        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-          const MachineOperand &mop = MI->getOperand(i);
-          if (mop.isReg() && mop.getReg() &&
-              TargetRegisterInfo::isVirtualRegister(mop.getReg())) {
-            unsigned reg = mop.getReg();
-            // Multiple uses of reg by the same instruction. It should not
-            // contribute to spill weight again.
-            if (UniqueUses.count(reg) != 0)
-              continue;
-            LiveInterval &RegInt = li_->getInterval(reg);
-            RegInt.weight +=
-              li_->getSpillWeight(mop.isDef(), mop.isUse(), loopDepth);
-            UniqueUses.insert(reg);
-          }
-        }
         ++mii;
       }
     }
   }
 
-  for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
-    LiveInterval &LI = *I->second;
-    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
-      // If the live interval length is essentially zero, i.e. in every live
-      // range the use follows def immediately, it doesn't make sense to spill
-      // it and hope it will be easier to allocate for this li.
-      if (isZeroLengthInterval(&LI))
-        LI.weight = HUGE_VALF;
-      else {
-        bool isLoad = false;
-        SmallVector<LiveInterval*, 4> SpillIs;
-        if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
-          // If all of the definitions of the interval are re-materializable,
-          // it is a preferred candidate for spilling. If non of the defs are
-          // loads, then it's potentially very cheap to re-materialize.
-          // FIXME: this gets much more complicated once we support non-trivial
-          // re-materialization.
-          if (isLoad)
-            LI.weight *= 0.9F;
-          else
-            LI.weight *= 0.5F;
-        }
-      }
-
-      // Slightly prefer live interval that has been assigned a preferred reg.
-      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
-      if (Hint.first || Hint.second)
-        LI.weight *= 1.01F;
-
-      // Divide the weight of the interval by its size.  This encourages
-      // spilling of intervals that are large and have few uses, and
-      // discourages spilling of small intervals with many uses.
-      LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
-    }
-  }
+  CalculateSpillWeights();
 
   DEBUG(dump());
   return true;
diff --git a/llvm/lib/CodeGen/SimpleRegisterCoalescing.h b/llvm/lib/CodeGen/SimpleRegisterCoalescing.h
index 7364767ab0c..20b8eb2274c 100644
--- a/llvm/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/llvm/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -123,7 +123,6 @@ namespace llvm {
     /// classes.  The registers may be either phys or virt regs.
     bool differingRegisterClasses(unsigned RegA, unsigned RegB) const;
 
-
     /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
     /// the source value number is defined by a copy from the destination reg
     /// see if we can merge these two destination reg valno# into a single
@@ -235,13 +234,15 @@ namespace llvm {
 
     /// lastRegisterUse - Returns the last use of the specific register between
     /// cycles Start and End or NULL if there are no uses.
-    MachineOperand *lastRegisterUse(MachineInstrIndex Start, MachineInstrIndex End,
-                                    unsigned Reg, MachineInstrIndex &LastUseIdx) const;
+    MachineOperand *lastRegisterUse(MachineInstrIndex Start,
+                                    MachineInstrIndex End, unsigned Reg,
+                                    MachineInstrIndex &LastUseIdx) const;
 
-    void printRegName(unsigned reg) const;
+    /// CalculateSpillWeights - Compute spill weights for all virtual register
+    /// live intervals.
+    void CalculateSpillWeights();
 
-    /// Returns true if the given live interval is zero length.
-    bool isZeroLengthInterval(LiveInterval *li) const;
+    void printRegName(unsigned reg) const;
   };
 
 } // End llvm namespace
diff --git a/llvm/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll b/llvm/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
index 311c8557426..221a168cba8 100644
--- a/llvm/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 164
+; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 161
 
 	%"struct.Adv5::Ekin<3>" = type <{ i8 }>
 	%"struct.Adv5::X::Energyflux<3>" = type { double }
diff --git a/llvm/test/CodeGen/ARM/remat.ll b/llvm/test/CodeGen/ARM/remat.ll
index 21d117aaf28..50da997ed46 100644
--- a/llvm/test/CodeGen/ARM/remat.ll
+++ b/llvm/test/CodeGen/ARM/remat.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin 
-; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 2
+; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 5
 
 	%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
 	%struct.LOCBOX = type { i32, i32, i32, i32 }
diff --git a/llvm/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/llvm/test/CodeGen/X86/2008-02-22-ReMatBug.ll
index 95df19ad1f8..a91ac27f98d 100644
--- a/llvm/test/CodeGen/X86/2008-02-22-ReMatBug.ll
+++ b/llvm/test/CodeGen/X86/2008-02-22-ReMatBug.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 3
-; RUN: llc < %s -march=x86 -stats |& grep {Number of dead spill slots removed}
+; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 2
 ; rdar://5761454
 
 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
diff --git a/llvm/test/CodeGen/X86/2008-07-11-SpillerBug.ll b/llvm/test/CodeGen/X86/2008-07-11-SpillerBug.ll
index 6f3019e78c4..f75e605168e 100644
--- a/llvm/test/CodeGen/X86/2008-07-11-SpillerBug.ll
+++ b/llvm/test/CodeGen/X86/2008-07-11-SpillerBug.ll
@@ -2,8 +2,9 @@
 ; PR2536
 
 
-; CHECK: movw %ax
+; CHECK: movw %cx
 ; CHECK-NEXT: andl    $65534, %
+; CHECK-NEXT: movl %
 ; CHECK-NEXT: movl $17
 
 @g_5 = external global i16		; <i16*> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll b/llvm/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
index 9f11c4e58ad..4d25b0f9831 100644
--- a/llvm/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
+++ b/llvm/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of registers downgraded}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 84
 ; rdar://6802189
 
 ; Test if linearscan is unfavoring registers for allocation to allow more reuse
diff --git a/llvm/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll b/llvm/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
new file mode 100644
index 00000000000..80b883582ce
--- /dev/null
+++ b/llvm/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -relocation-model=pic | FileCheck %s
+
+define void @dot(i16* nocapture %A, i32 %As, i16* nocapture %B, i32 %Bs, i16* nocapture %C, i32 %N) nounwind ssp {
+; CHECK: dot:
+; CHECK: decl %
+; CHECK-NEXT: jne
+entry:
+	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %bb2
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%sum.04 = phi i32 [ 0, %entry ], [ %10, %bb ]		; <i32> [#uses=1]
+	%1 = mul i32 %i.03, %As		; <i32> [#uses=1]
+	%2 = getelementptr i16* %A, i32 %1		; <i16*> [#uses=1]
+	%3 = load i16* %2, align 2		; <i16> [#uses=1]
+	%4 = sext i16 %3 to i32		; <i32> [#uses=1]
+	%5 = mul i32 %i.03, %Bs		; <i32> [#uses=1]
+	%6 = getelementptr i16* %B, i32 %5		; <i16*> [#uses=1]
+	%7 = load i16* %6, align 2		; <i16> [#uses=1]
+	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
+	%9 = mul i32 %8, %4		; <i32> [#uses=1]
+	%10 = add i32 %9, %sum.04		; <i32> [#uses=2]
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb
+
+bb1.bb2_crit_edge:		; preds = %bb
+	%phitmp = trunc i32 %10 to i16		; <i16> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %entry, %bb1.bb2_crit_edge
+	%sum.0.lcssa = phi i16 [ %phitmp, %bb1.bb2_crit_edge ], [ 0, %entry ]		; <i16> [#uses=1]
+	store i16 %sum.0.lcssa, i16* %C, align 2
+	ret void
+}
diff --git a/llvm/test/CodeGen/X86/stack-color-with-reg.ll b/llvm/test/CodeGen/X86/stack-color-with-reg.ll
index a8ae8e8168e..672f77eef02 100644
--- a/llvm/test/CodeGen/X86/stack-color-with-reg.ll
+++ b/llvm/test/CodeGen/X86/stack-color-with-reg.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
-; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 8
-; RUN:   grep asm-printer %t   | grep 182
+; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 5
+; RUN:   grep asm-printer %t   | grep 179
 
 	type { [62 x %struct.Bitvec*] }		; type %0
 	type { i8* }		; type %1
author	Evan Cheng <evan.cheng@apple.com>	2009-09-21 21:12:25 +0000
committer	Evan Cheng <evan.cheng@apple.com>	2009-09-21 21:12:25 +0000
commit	255f4164701b7bae414d364746d2457b98c1949f (patch)
tree	7049dfe936a8c7c03bd1feb7f95c72024068aa11 /llvm
parent	509027336700d3ebafae951f915360ac567b63dd (diff)
download	bcm5719-llvm-255f4164701b7bae414d364746d2457b98c1949f.tar.gz bcm5719-llvm-255f4164701b7bae414d364746d2457b98c1949f.zip