diff options
| author | Evan Cheng <evan.cheng@apple.com> | 2009-09-21 21:12:25 +0000 | 
|---|---|---|
| committer | Evan Cheng <evan.cheng@apple.com> | 2009-09-21 21:12:25 +0000 | 
| commit | 255f4164701b7bae414d364746d2457b98c1949f (patch) | |
| tree | 7049dfe936a8c7c03bd1feb7f95c72024068aa11 /llvm | |
| parent | 509027336700d3ebafae951f915360ac567b63dd (diff) | |
| download | bcm5719-llvm-255f4164701b7bae414d364746d2457b98c1949f.tar.gz bcm5719-llvm-255f4164701b7bae414d364746d2457b98c1949f.zip  | |
Clean up spill weight computation. Also some changes to give loop induction
variable increment / decrement slighter high priority. 
This has major impact on some micro-benchmarks. On MultiSource/Applications
and spec tests, it's a minor win. It also reduce 256.bzip instruction count
by 8%, 55 on 164.gzip on i386 / Darwin.
llvm-svn: 82485
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp | 148 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SimpleRegisterCoalescing.h | 13 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/remat.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/2008-02-22-ReMatBug.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/2008-07-11-SpillerBug.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll | 36 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/stack-color-with-reg.ll | 4 | 
9 files changed, 145 insertions, 68 deletions
diff --git a/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp b/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp index bbb742678fb..068dcda93b5 100644 --- a/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -2535,7 +2535,8 @@ void SimpleRegisterCoalescing::releaseMemory() {    ReMatDefs.clear();  } -bool SimpleRegisterCoalescing::isZeroLengthInterval(LiveInterval *li) const { +/// Returns true if the given live interval is zero length. +static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) {    for (LiveInterval::Ranges::const_iterator           i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)      if (li_->getPrevIndex(i->end) > i->start) @@ -2543,6 +2544,97 @@ bool SimpleRegisterCoalescing::isZeroLengthInterval(LiveInterval *li) const {    return true;  } +void SimpleRegisterCoalescing::CalculateSpillWeights() { +  SmallSet<unsigned, 4> Processed; +  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); +       mbbi != mbbe; ++mbbi) { +    MachineBasicBlock* MBB = mbbi; +    MachineInstrIndex MBBEnd = li_->getMBBEndIdx(MBB); +    MachineLoop* loop = loopInfo->getLoopFor(MBB); +    unsigned loopDepth = loop ? loop->getLoopDepth() : 0; +    bool isExit = loop ? loop->isLoopExit(MBB) : false; + +    for (MachineBasicBlock::iterator mii = MBB->begin(), mie = MBB->end(); +         mii != mie; ++mii) { +      MachineInstr *MI = mii; + +      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { +        const MachineOperand &mopi = MI->getOperand(i); +        if (!mopi.isReg() || mopi.getReg() == 0) +          continue; +        unsigned Reg = mopi.getReg(); +        if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg())) +          continue; +        // Multiple uses of reg by the same instruction. It should not +        // contribute to spill weight again. +        if (!Processed.insert(Reg)) +          continue; + +        bool HasDef = mopi.isDef(); +        bool HasUse = mopi.isUse(); +        for (unsigned j = i+1; j != e; ++j) { +          const MachineOperand &mopj = MI->getOperand(j); +          if (!mopj.isReg() || mopj.getReg() != Reg) +            continue; +          HasDef |= mopj.isDef(); +          HasUse |= mopj.isUse(); +        } + +        LiveInterval &RegInt = li_->getInterval(Reg); +        float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth+1); +        if (HasDef && isExit) { +          // Looks like this is a loop count variable update. +          MachineInstrIndex DefIdx = +            li_->getDefIndex(li_->getInstructionIndex(MI)); +          const LiveRange *DLR = +            li_->getInterval(Reg).getLiveRangeContaining(DefIdx); +          if (DLR->end > MBBEnd) +            Weight *= 3.0F; +        } +        RegInt.weight += Weight; +      } +      Processed.clear(); +    } +  } + +  for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { +    LiveInterval &LI = *I->second; +    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { +      // If the live interval length is essentially zero, i.e. in every live +      // range the use follows def immediately, it doesn't make sense to spill +      // it and hope it will be easier to allocate for this li. +      if (isZeroLengthInterval(&LI, li_)) { +        LI.weight = HUGE_VALF; +        continue; +      } + +      bool isLoad = false; +      SmallVector<LiveInterval*, 4> SpillIs; +      if (li_->isReMaterializable(LI, SpillIs, isLoad)) { +        // If all of the definitions of the interval are re-materializable, +        // it is a preferred candidate for spilling. If non of the defs are +        // loads, then it's potentially very cheap to re-materialize. +        // FIXME: this gets much more complicated once we support non-trivial +        // re-materialization. +        if (isLoad) +          LI.weight *= 0.9F; +        else +          LI.weight *= 0.5F; +      } + +      // Slightly prefer live interval that has been assigned a preferred reg. +      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg); +      if (Hint.first || Hint.second) +        LI.weight *= 1.01F; + +      // Divide the weight of the interval by its size.  This encourages +      // spilling of intervals that are large and have few uses, and +      // discourages spilling of small intervals with many uses. +      LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM; +    } +  } +} +  bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {    mf_ = &fn; @@ -2581,8 +2673,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {    for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();         mbbi != mbbe; ++mbbi) {      MachineBasicBlock* mbb = mbbi; -    unsigned loopDepth = loopInfo->getLoopDepth(mbb); -      for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();           mii != mie; ) {        MachineInstr *MI = mii; @@ -2656,62 +2746,12 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {          mii = mbbi->erase(mii);          ++numPeep;        } else { -        SmallSet<unsigned, 4> UniqueUses; -        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { -          const MachineOperand &mop = MI->getOperand(i); -          if (mop.isReg() && mop.getReg() && -              TargetRegisterInfo::isVirtualRegister(mop.getReg())) { -            unsigned reg = mop.getReg(); -            // Multiple uses of reg by the same instruction. It should not -            // contribute to spill weight again. -            if (UniqueUses.count(reg) != 0) -              continue; -            LiveInterval &RegInt = li_->getInterval(reg); -            RegInt.weight += -              li_->getSpillWeight(mop.isDef(), mop.isUse(), loopDepth); -            UniqueUses.insert(reg); -          } -        }          ++mii;        }      }    } -  for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) { -    LiveInterval &LI = *I->second; -    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { -      // If the live interval length is essentially zero, i.e. in every live -      // range the use follows def immediately, it doesn't make sense to spill -      // it and hope it will be easier to allocate for this li. -      if (isZeroLengthInterval(&LI)) -        LI.weight = HUGE_VALF; -      else { -        bool isLoad = false; -        SmallVector<LiveInterval*, 4> SpillIs; -        if (li_->isReMaterializable(LI, SpillIs, isLoad)) { -          // If all of the definitions of the interval are re-materializable, -          // it is a preferred candidate for spilling. If non of the defs are -          // loads, then it's potentially very cheap to re-materialize. -          // FIXME: this gets much more complicated once we support non-trivial -          // re-materialization. -          if (isLoad) -            LI.weight *= 0.9F; -          else -            LI.weight *= 0.5F; -        } -      } - -      // Slightly prefer live interval that has been assigned a preferred reg. -      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg); -      if (Hint.first || Hint.second) -        LI.weight *= 1.01F; - -      // Divide the weight of the interval by its size.  This encourages -      // spilling of intervals that are large and have few uses, and -      // discourages spilling of small intervals with many uses. -      LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM; -    } -  } +  CalculateSpillWeights();    DEBUG(dump());    return true; diff --git a/llvm/lib/CodeGen/SimpleRegisterCoalescing.h b/llvm/lib/CodeGen/SimpleRegisterCoalescing.h index 7364767ab0c..20b8eb2274c 100644 --- a/llvm/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/llvm/lib/CodeGen/SimpleRegisterCoalescing.h @@ -123,7 +123,6 @@ namespace llvm {      /// classes.  The registers may be either phys or virt regs.      bool differingRegisterClasses(unsigned RegA, unsigned RegB) const; -      /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If      /// the source value number is defined by a copy from the destination reg      /// see if we can merge these two destination reg valno# into a single @@ -235,13 +234,15 @@ namespace llvm {      /// lastRegisterUse - Returns the last use of the specific register between      /// cycles Start and End or NULL if there are no uses. -    MachineOperand *lastRegisterUse(MachineInstrIndex Start, MachineInstrIndex End, -                                    unsigned Reg, MachineInstrIndex &LastUseIdx) const; +    MachineOperand *lastRegisterUse(MachineInstrIndex Start, +                                    MachineInstrIndex End, unsigned Reg, +                                    MachineInstrIndex &LastUseIdx) const; -    void printRegName(unsigned reg) const; +    /// CalculateSpillWeights - Compute spill weights for all virtual register +    /// live intervals. +    void CalculateSpillWeights(); -    /// Returns true if the given live interval is zero length. -    bool isZeroLengthInterval(LiveInterval *li) const; +    void printRegName(unsigned reg) const;    };  } // End llvm namespace diff --git a/llvm/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll b/llvm/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll index 311c8557426..221a168cba8 100644 --- a/llvm/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll +++ b/llvm/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 164 +; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 161  	%"struct.Adv5::Ekin<3>" = type <{ i8 }>  	%"struct.Adv5::X::Energyflux<3>" = type { double } diff --git a/llvm/test/CodeGen/ARM/remat.ll b/llvm/test/CodeGen/ARM/remat.ll index 21d117aaf28..50da997ed46 100644 --- a/llvm/test/CodeGen/ARM/remat.ll +++ b/llvm/test/CodeGen/ARM/remat.ll @@ -1,5 +1,5 @@  ; RUN: llc < %s -mtriple=arm-apple-darwin  -; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 2 +; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 5  	%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }  	%struct.LOCBOX = type { i32, i32, i32, i32 } diff --git a/llvm/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/llvm/test/CodeGen/X86/2008-02-22-ReMatBug.ll index 95df19ad1f8..a91ac27f98d 100644 --- a/llvm/test/CodeGen/X86/2008-02-22-ReMatBug.ll +++ b/llvm/test/CodeGen/X86/2008-02-22-ReMatBug.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 3 -; RUN: llc < %s -march=x86 -stats |& grep {Number of dead spill slots removed} +; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 2  ; rdar://5761454  	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* } diff --git a/llvm/test/CodeGen/X86/2008-07-11-SpillerBug.ll b/llvm/test/CodeGen/X86/2008-07-11-SpillerBug.ll index 6f3019e78c4..f75e605168e 100644 --- a/llvm/test/CodeGen/X86/2008-07-11-SpillerBug.ll +++ b/llvm/test/CodeGen/X86/2008-07-11-SpillerBug.ll @@ -2,8 +2,9 @@  ; PR2536 -; CHECK: movw %ax +; CHECK: movw %cx  ; CHECK-NEXT: andl    $65534, % +; CHECK-NEXT: movl %  ; CHECK-NEXT: movl $17  @g_5 = external global i16		; <i16*> [#uses=2] diff --git a/llvm/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll b/llvm/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll index 9f11c4e58ad..4d25b0f9831 100644 --- a/llvm/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll +++ b/llvm/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of registers downgraded} +; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 84  ; rdar://6802189  ; Test if linearscan is unfavoring registers for allocation to allow more reuse diff --git a/llvm/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll b/llvm/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll new file mode 100644 index 00000000000..80b883582ce --- /dev/null +++ b/llvm/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -relocation-model=pic | FileCheck %s + +define void @dot(i16* nocapture %A, i32 %As, i16* nocapture %B, i32 %Bs, i16* nocapture %C, i32 %N) nounwind ssp { +; CHECK: dot: +; CHECK: decl % +; CHECK-NEXT: jne +entry: +	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1] +	br i1 %0, label %bb, label %bb2 + +bb:		; preds = %bb, %entry +	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3] +	%sum.04 = phi i32 [ 0, %entry ], [ %10, %bb ]		; <i32> [#uses=1] +	%1 = mul i32 %i.03, %As		; <i32> [#uses=1] +	%2 = getelementptr i16* %A, i32 %1		; <i16*> [#uses=1] +	%3 = load i16* %2, align 2		; <i16> [#uses=1] +	%4 = sext i16 %3 to i32		; <i32> [#uses=1] +	%5 = mul i32 %i.03, %Bs		; <i32> [#uses=1] +	%6 = getelementptr i16* %B, i32 %5		; <i16*> [#uses=1] +	%7 = load i16* %6, align 2		; <i16> [#uses=1] +	%8 = sext i16 %7 to i32		; <i32> [#uses=1] +	%9 = mul i32 %8, %4		; <i32> [#uses=1] +	%10 = add i32 %9, %sum.04		; <i32> [#uses=2] +	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2] +	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1] +	br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb + +bb1.bb2_crit_edge:		; preds = %bb +	%phitmp = trunc i32 %10 to i16		; <i16> [#uses=1] +	br label %bb2 + +bb2:		; preds = %entry, %bb1.bb2_crit_edge +	%sum.0.lcssa = phi i16 [ %phitmp, %bb1.bb2_crit_edge ], [ 0, %entry ]		; <i16> [#uses=1] +	store i16 %sum.0.lcssa, i16* %C, align 2 +	ret void +} diff --git a/llvm/test/CodeGen/X86/stack-color-with-reg.ll b/llvm/test/CodeGen/X86/stack-color-with-reg.ll index a8ae8e8168e..672f77eef02 100644 --- a/llvm/test/CodeGen/X86/stack-color-with-reg.ll +++ b/llvm/test/CodeGen/X86/stack-color-with-reg.ll @@ -1,6 +1,6 @@  ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t -; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 8 -; RUN:   grep asm-printer %t   | grep 182 +; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 5 +; RUN:   grep asm-printer %t   | grep 179  	type { [62 x %struct.Bitvec*] }		; type %0  	type { i8* }		; type %1  | 

