diff options
| -rw-r--r-- | llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h | 6 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp | 120 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SimpleRegisterCoalescing.h | 7 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/2008-10-16-SpillerBug.ll | 7 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/postra-licm.ll | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/stack-color-with-reg.ll | 4 | 
7 files changed, 88 insertions, 59 deletions
diff --git a/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h b/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h index 8ddcac70599..351217ce583 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -111,6 +111,12 @@ namespace llvm {      double getScaledIntervalSize(LiveInterval& I) {        return (1000.0 * I.getSize()) / indexes_->getIndexesLength();      } + +    /// getFuncInstructionCount - Return the number of instructions in the +    /// current function. +    unsigned getFuncInstructionCount() { +      return indexes_->getFunctionSize(); +    }      /// getApproximateInstructionCount - computes an estimate of the number      /// of instructions in a given LiveInterval. diff --git a/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp b/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp index 1f9d726143a..78011ed67f8 100644 --- a/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -1168,20 +1168,44 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,  /// isWinToJoinCrossClass - Return true if it's profitable to coalesce  /// two virtual registers from different register classes.  bool -SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg, -                                                unsigned SmallReg, -                                                unsigned Threshold) { -  // Then make sure the intervals are *short*. -  LiveInterval &LargeInt = li_->getInterval(LargeReg); -  LiveInterval &SmallInt = li_->getInterval(SmallReg); -  unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt); -  unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt); -  if (LargeSize > Threshold) { -    unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg), -                                       mri_->use_nodbg_end()); -    unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg), -                                       mri_->use_nodbg_end()); -    if (SmallUses*LargeSize < LargeUses*SmallSize) +SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg, +                                                unsigned DstReg, +                                             const TargetRegisterClass *SrcRC, +                                             const TargetRegisterClass *DstRC, +                                             const TargetRegisterClass *NewRC) { +  unsigned NewRCCount = allocatableRCRegs_[NewRC].count(); +  // This heuristics is good enough in practice, but it's obviously not *right*. +  // 4 is a magic number that works well enough for x86, ARM, etc. It filter +  // out all but the most restrictive register classes. +  if (NewRCCount > 4 || +      // Early exit if the function is fairly small, coalesce aggressively if +      // that's the case. For really special register classes with 3 or +      // fewer registers, be a bit more careful. +      (li_->getFuncInstructionCount() / NewRCCount) < 8) +    return true; +  LiveInterval &SrcInt = li_->getInterval(SrcReg); +  LiveInterval &DstInt = li_->getInterval(DstReg); +  unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt); +  unsigned DstSize = li_->getApproximateInstructionCount(DstInt); +  if (SrcSize <= NewRCCount && DstSize <= NewRCCount) +    return true; +  // Estimate *register use density*. If it doubles or more, abort. +  unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg), +                                   mri_->use_nodbg_end()); +  unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg), +                                   mri_->use_nodbg_end()); +  float NewDensity = ((float)(SrcUses + DstUses) / (SrcSize + DstSize)) / +    NewRCCount; +  if (SrcRC != NewRC && SrcSize > NewRCCount) { +    unsigned SrcRCCount = allocatableRCRegs_[SrcRC].count(); +    float Density = ((float)SrcUses / SrcSize) / SrcRCCount; +    if (NewDensity > Density * 2.0f) +      return false; +  } +  if (DstRC != NewRC && DstSize > NewRCCount) { +    unsigned DstRCCount = allocatableRCRegs_[DstRC].count(); +    float Density = ((float)DstUses / DstSize) / DstRCCount; +    if (NewDensity > Density * 2.0f)        return false;    }    return true; @@ -1517,10 +1541,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {            return false;  // Not coalescable          } -        unsigned LargeReg = isExtSubReg ? SrcReg : DstReg; -        unsigned SmallReg = isExtSubReg ? DstReg : SrcReg; -        unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count(); -        if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) { +        if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) { +          DEBUG(dbgs() << "\tAvoid coalescing to constrainted register class: " +                       << SrcRC->getName() << "/" +                       << DstRC->getName() << " -> " +                       << NewRC->getName() << ".\n");            Again = true;  // May be possible to coalesce later.            return false;          } @@ -1568,49 +1593,40 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {        }      } -    unsigned LargeReg = SrcReg; -    unsigned SmallReg = DstReg; -      // Now determine the register class of the joined register. -    if (isExtSubReg) { -      if (SubIdx && DstRC && DstRC->isASubClass()) { -        // This is a move to a sub-register class. However, the source is a -        // sub-register of a larger register class. We don't know what should -        // the register class be. FIXME. -        Again = true; -        return false; +    if (!SrcIsPhys && !DstIsPhys) { +      if (isExtSubReg) { +        NewRC = +          SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC; +      } else if (isInsSubReg) { +        NewRC = +          SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC; +      } else { +        NewRC = getCommonSubClass(SrcRC, DstRC);        } -      if (!DstIsPhys && !SrcIsPhys) -        NewRC = SrcRC; -    } else if (!SrcIsPhys && !DstIsPhys) { -      NewRC = getCommonSubClass(SrcRC, DstRC); +        if (!NewRC) {          DEBUG(dbgs() << "\tDisjoint regclasses: "                       << SrcRC->getName() << ", "                       << DstRC->getName() << ".\n");          return false;           // Not coalescable.        } -      if (DstRC->getSize() > SrcRC->getSize()) -        std::swap(LargeReg, SmallReg); -    } -    // If we are joining two virtual registers and the resulting register -    // class is more restrictive (fewer register, smaller size). Check if it's -    // worth doing the merge. -    if (!SrcIsPhys && !DstIsPhys && -        (isExtSubReg || DstRC->isASubClass()) && -        !isWinToJoinCrossClass(LargeReg, SmallReg, -                               allocatableRCRegs_[NewRC].count())) { -      DEBUG(dbgs() << "\tSrc/Dest are different register classes: " -                   << SrcRC->getName() << "/" -                   << DstRC->getName() << " -> " -                   << NewRC->getName() << ".\n"); -      // Allow the coalescer to try again in case either side gets coalesced to -      // a physical register that's compatible with the other side. e.g. -      // r1024 = MOV32to32_ r1025 -      // But later r1024 is assigned EAX then r1025 may be coalesced with EAX. -      Again = true;  // May be possible to coalesce later. -      return false; +      // If we are joining two virtual registers and the resulting register +      // class is more restrictive (fewer register, smaller size). Check if it's +      // worth doing the merge. +      if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) { +        DEBUG(dbgs() << "\tAvoid coalescing to constrainted register class: " +                     << SrcRC->getName() << "/" +                     << DstRC->getName() << " -> " +                     << NewRC->getName() << ".\n"); +        // Allow the coalescer to try again in case either side gets coalesced to +        // a physical register that's compatible with the other side. e.g. +        // r1024 = MOV32to32_ r1025 +        // But later r1024 is assigned EAX then r1025 may be coalesced with EAX. +        Again = true;  // May be possible to coalesce later. +        return false; +      }      }    } diff --git a/llvm/lib/CodeGen/SimpleRegisterCoalescing.h b/llvm/lib/CodeGen/SimpleRegisterCoalescing.h index f668064ab08..bd2d0bb90ab 100644 --- a/llvm/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/llvm/lib/CodeGen/SimpleRegisterCoalescing.h @@ -179,8 +179,11 @@ namespace llvm {      /// isWinToJoinCrossClass - Return true if it's profitable to coalesce      /// two virtual registers from different register classes. -    bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg, -                               unsigned Threshold); +    bool isWinToJoinCrossClass(unsigned SrcReg, +                               unsigned DstReg, +                               const TargetRegisterClass *SrcRC, +                               const TargetRegisterClass *DstRC, +                               const TargetRegisterClass *NewRC);      /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual      /// register with a physical register, check if any of the virtual register diff --git a/llvm/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/llvm/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll index 2b209319792..6c453499a43 100644 --- a/llvm/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll +++ b/llvm/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 3 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 1  define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind {  entry: diff --git a/llvm/test/CodeGen/X86/2008-10-16-SpillerBug.ll b/llvm/test/CodeGen/X86/2008-10-16-SpillerBug.ll index b8ca364d179..f811230ce43 100644 --- a/llvm/test/CodeGen/X86/2008-10-16-SpillerBug.ll +++ b/llvm/test/CodeGen/X86/2008-10-16-SpillerBug.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edi} +; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 40 +; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | FileCheck %s  	%struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }  	%struct.XXDAlphaTest = type { float, i16, i8, i8 } @@ -61,11 +62,15 @@  define void @t(%struct.XXDState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._XXVMConstants* %cnstn, %struct.YYToken* %pstrm, %struct.XXVMVPContext* %vmctx, %struct.XXVMTextures* %txtrs, %struct.XXVMVPStack* %vpstk, <4 x float>* %atr0, <4 x float>* %atr1, <4 x float>* %atr2, <4 x float>* %atr3, <4 x float>* %vtx0, <4 x float>* %vtx1, <4 x float>* %vtx2, <4 x float>* %vtx3, [4 x <4 x float>]* %tmpGbl, i32* %oldMsk, <4 x i32>* %adrGbl, i64 %key_token) nounwind {  entry: +; CHECK: t: +; CHECK: xorl %ecx, %ecx  	%0 = trunc i64 %key_token to i32		; <i32> [#uses=1]  	%1 = getelementptr %struct.YYToken* %pstrm, i32 %0		; <%struct.YYToken*> [#uses=5]  	br label %bb1132  bb51:		; preds = %bb1132 +; CHECK: .align 4 +; CHECK: andl $7  	%2 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec, i32 0, i32 0		; <i16*> [#uses=1]  	%3 = load i16* %2, align 1		; <i16> [#uses=3]  	%4 = lshr i16 %3, 6		; <i16> [#uses=1] diff --git a/llvm/test/CodeGen/X86/postra-licm.ll b/llvm/test/CodeGen/X86/postra-licm.ll index db7a9eccbec..97cc7b4977c 100644 --- a/llvm/test/CodeGen/X86/postra-licm.ll +++ b/llvm/test/CodeGen/X86/postra-licm.ll @@ -149,7 +149,6 @@ entry:  bb.nph:                                           ; preds = %entry  ; X86-64: movq _map_4_to_16@GOTPCREL(%rip) -; X86-64: movq _map_4_to_16@GOTPCREL(%rip)  ; X86-64: .align 4    %tmp5 = zext i32 undef to i64                   ; <i64> [#uses=1]    %tmp6 = add i64 %tmp5, 1                        ; <i64> [#uses=1] diff --git a/llvm/test/CodeGen/X86/stack-color-with-reg.ll b/llvm/test/CodeGen/X86/stack-color-with-reg.ll index 83f56c10bb7..001a5409640 100644 --- a/llvm/test/CodeGen/X86/stack-color-with-reg.ll +++ b/llvm/test/CodeGen/X86/stack-color-with-reg.ll @@ -1,6 +1,6 @@  ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t -; RUN:   grep asm-printer %t | grep 156 -; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 4 +; RUN:   grep asm-printer %t | grep 166 +; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 5  	type { [62 x %struct.Bitvec*] }		; type %0  	type { i8* }		; type %1  | 

