diff options
| -rw-r--r-- | llvm/include/llvm/Target/TargetRegisterInfo.h | 9 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp | 31 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/VirtRegRewriter.cpp | 39 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86RegisterInfo.cpp | 78 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86RegisterInfo.h | 7 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/coalescer-cross.ll | 41 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/stack-color-with-reg.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/x86-64-sret-return.ll | 2 | 
10 files changed, 198 insertions, 19 deletions
diff --git a/llvm/include/llvm/Target/TargetRegisterInfo.h b/llvm/include/llvm/Target/TargetRegisterInfo.h index 29f96e961ab..9cd8489c803 100644 --- a/llvm/include/llvm/Target/TargetRegisterInfo.h +++ b/llvm/include/llvm/Target/TargetRegisterInfo.h @@ -484,6 +484,15 @@ public:      return 0;    } +  /// getMatchingSuperRegClass - Return a subclass of the specified register +  /// class A so that each register in it has a sub-register of the +  /// specified sub-register index which is in the specified register class B. +  virtual const TargetRegisterClass * +  getMatchingSuperRegClass(const TargetRegisterClass *A, +                           const TargetRegisterClass *B, unsigned Idx) const { +    return 0; +  } +    //===--------------------------------------------------------------------===//    // Register Class Information    // diff --git a/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp b/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp index 7cb14e93123..9f8d1bad34b 100644 --- a/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/llvm/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -59,7 +59,7 @@ NewHeuristic("new-coalescer-heuristic",  static cl::opt<bool>  CrossClassJoin("join-cross-class-copies",                 cl::desc("Coalesce cross register class copies"), -               cl::init(false), cl::Hidden); +               cl::init(true), cl::Hidden);  static cl::opt<bool>  PhysJoinTweak("tweak-phys-join-heuristics", @@ -1308,6 +1308,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {    // Should be non-null only when coalescing to a sub-register class.    bool CrossRC = false; +  const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); +  const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);    const TargetRegisterClass *NewRC = NULL;    MachineBasicBlock *CopyMBB = CopyMI->getParent();    unsigned RealDstReg = 0; @@ -1373,6 +1375,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {          }        }        if (SubIdx) { +        if (isInsSubReg || isSubRegToReg) { +          if (!DstIsPhys && !SrcIsPhys) { +            NewRC = tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx); +            if (!NewRC) +              return false; +          } +        }          unsigned LargeReg = isExtSubReg ? SrcReg : DstReg;          unsigned SmallReg = isExtSubReg ? DstReg : SrcReg;          unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count(); @@ -1424,11 +1433,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {        }      } -    const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); -    const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);      unsigned LargeReg = SrcReg;      unsigned SmallReg = DstReg; -    unsigned Limit = 0;      // Now determine the register class of the joined register.      if (isExtSubReg) { @@ -1439,7 +1445,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {          Again = true;          return false;        } -      Limit = allocatableRCRegs_[DstRC].count(); +      if (!DstIsPhys && !SrcIsPhys) +        NewRC = SrcRC;      } else if (!SrcIsPhys && !DstIsPhys) {        NewRC = getCommonSubClass(SrcRC, DstRC);        if (!NewRC) { @@ -1643,11 +1650,15 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {    // Coalescing to a virtual register that is of a sub-register class of the    // other. Make sure the resulting register is set to the right register class. -  if (CrossRC) { -      ++numCrossRCs; -    if (NewRC) -      mri_->setRegClass(DstReg, NewRC); -  } +  if (CrossRC) +    ++numCrossRCs; + +  // This may happen even if it's cross-rc coalescing. e.g. +  // %reg1026<def> = SUBREG_TO_REG 0, %reg1037<kill>, 4 +  // reg1026 -> GR64, reg1037 -> GR32_ABCD. The resulting register will have to +  // be allocate a register from GR64_ABCD. +  if (NewRC) +    mri_->setRegClass(DstReg, NewRC);    if (NewHeuristic) {      // Add all copies that define val# in the source interval into the queue. diff --git a/llvm/lib/CodeGen/VirtRegRewriter.cpp b/llvm/lib/CodeGen/VirtRegRewriter.cpp index abaa8bd212e..9537c05d820 100644 --- a/llvm/lib/CodeGen/VirtRegRewriter.cpp +++ b/llvm/lib/CodeGen/VirtRegRewriter.cpp @@ -1344,6 +1344,31 @@ private:      ++NumStores;    } +  /// isSafeToDelete - Return true if this instruction doesn't produce any side +  /// effect and all of its defs are dead. +  static bool isSafeToDelete(MachineInstr &MI) { +    const TargetInstrDesc &TID = MI.getDesc(); +    if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() || +        TID.isCall() || TID.isBarrier() || TID.isReturn() || +        TID.hasUnmodeledSideEffects()) +      return false; +    if (TID.getImplicitDefs()) +      return false; +    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { +      MachineOperand &MO = MI.getOperand(i); +      if (!MO.isReg() || !MO.getReg()) +        continue; +      if (MO.isDef() && !MO.isDead()) +        return false; +      if (MO.isUse() && MO.isKill()) +        // FIXME: We can't remove kill markers or else the scavenger will assert. +        // An alternative is to add a ADD pseudo instruction to replace kill +        // markers. +        return false; +    } +    return true; +  } +    /// TransferDeadness - A identity copy definition is dead and it's being    /// removed. Find the last def or use and mark it as dead / kill.    void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist, @@ -1385,9 +1410,7 @@ private:        if (LastUD->isDef()) {          // If the instruction has no side effect, delete it and propagate          // backward further. Otherwise, mark is dead and we are done. -        const TargetInstrDesc &TID = LastUDMI->getDesc(); -        if (TID.mayStore() || TID.isCall() || TID.isTerminator() || -            TID.hasUnmodeledSideEffects()) { +        if (!isSafeToDelete(*LastUDMI)) {            LastUD->setIsDead();            break;          } @@ -2170,7 +2193,15 @@ private:          }            }      ProcessNextInst: -      DistanceMap.insert(std::make_pair(&MI, Dist++)); +      // Delete dead instructions without side effects. +      if (!Erased && !BackTracked && isSafeToDelete(MI)) { +        InvalidateKills(MI, TRI, RegKills, KillOps); +        VRM.RemoveMachineInstrFromMaps(&MI); +        MBB.erase(&MI); +        Erased = true; +      } +      if (!Erased) +        DistanceMap.insert(std::make_pair(&MI, Dist++));        if (!Erased && !BackTracked) {          for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)            UpdateKills(*II, TRI, RegKills, KillOps); diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 437986fb9c3..0dc63ef9a61 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -152,6 +152,84 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {    }  } +const TargetRegisterClass * +X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, +                                          const TargetRegisterClass *B, +                                          unsigned SubIdx) const { +  switch (SubIdx) { +  default: return 0; +  case 1: +    // 8-bit +    if (B == &X86::GR8RegClass) { +      if (A == &X86::GR64RegClass) +        return &X86::GR64RegClass; +      else if (A == &X86::GR32RegClass) +        return &X86::GR32RegClass; +      else if (A == &X86::GR16RegClass) +        return &X86::GR16RegClass; +    } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) { +      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass) +        return &X86::GR64_ABCDRegClass; +      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass) +        return &X86::GR32_ABCDRegClass; +      else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass) +        return &X86::GR16_ABCDRegClass; +    } else if (B == &X86::GR8_NOREXRegClass) { +      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass) +        return &X86::GR64_NOREXRegClass; +      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass) +        return &X86::GR32_NOREXRegClass; +      else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass) +        return &X86::GR16_NOREXRegClass; +    } +    break; +  case 2: +    // 8-bit hi +    if (B == &X86::GR8_ABCD_HRegClass) { +      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass) +        return &X86::GR64_ABCDRegClass; +      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass) +        return &X86::GR32_ABCDRegClass; +      else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass) +        return &X86::GR16_ABCDRegClass; +    } +    break; +  case 3: +    // 16-bit +    if (B == &X86::GR16RegClass) { +      if (A == &X86::GR64RegClass) +        return &X86::GR64RegClass; +      else if (A == &X86::GR32RegClass) +        return &X86::GR32RegClass; +    } else if (B == &X86::GR16_ABCDRegClass) { +      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass) +        return &X86::GR64_ABCDRegClass; +      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass) +        return &X86::GR32_ABCDRegClass; +    } else if (B == &X86::GR16_NOREXRegClass) { +      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass) +        return &X86::GR64_NOREXRegClass; +      else if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass) +        return &X86::GR64_ABCDRegClass; +    } +    break; +  case 4: +    // 32-bit +    if (B == &X86::GR32RegClass) { +      if (A == &X86::GR64RegClass) +        return &X86::GR64RegClass; +    } else if (B == &X86::GR32_ABCDRegClass) { +      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass) +        return &X86::GR64_ABCDRegClass; +    } else if (B == &X86::GR32_NOREXRegClass) { +      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass) +        return &X86::GR64_NOREXRegClass; +    } +    break; +  } +  return 0; +} +  const TargetRegisterClass *X86RegisterInfo::getPointerRegClass() const {    const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();    if (Subtarget->is64Bit()) diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h index eac8426a980..702e69dee26 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -93,6 +93,13 @@ public:    /// Code Generation virtual methods...    ///  +  /// getMatchingSuperRegClass - Return a subclass of the specified register +  /// class A so that each register in it has a sub-register of the +  /// specified sub-register index which is in the specified register class B. +  virtual const TargetRegisterClass * +  getMatchingSuperRegClass(const TargetRegisterClass *A, +                           const TargetRegisterClass *B, unsigned Idx) const; +    /// getPointerRegClass - Returns a TargetRegisterClass used for pointer    /// values.    const TargetRegisterClass *getPointerRegClass() const; diff --git a/llvm/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/llvm/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll index 6c86764073e..1af07d4130a 100644 --- a/llvm/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll +++ b/llvm/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc | grep 328 +; RUN: llvm-as < %s | llc | grep 168  target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"  target triple = "s390x-linux" diff --git a/llvm/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll b/llvm/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll index b5635b38cfc..bec43f0fbb7 100644 --- a/llvm/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll +++ b/llvm/test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll @@ -1,4 +1,5 @@ -; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | grep movl | count 1 +; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | grep movl | count 2 +; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=att | not grep movb  	%struct.double_int = type { i64, i64 }  	%struct.tree_common = type <{ i8, [3 x i8] }> @@ -6,7 +7,7 @@  	%struct.tree_node = type { %struct.tree_int_cst }  @tree_code_type = external constant [0 x i32]		; <[0 x i32]*> [#uses=1] -define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) { +define i32 @simple_cst_equal(%struct.tree_node* %t1, %struct.tree_node* %t2) nounwind {  entry:  	%tmp2526 = bitcast %struct.tree_node* %t1 to i32*		; <i32*> [#uses=1]  	br i1 false, label %UnifiedReturnBlock, label %bb21 diff --git a/llvm/test/CodeGen/X86/coalescer-cross.ll b/llvm/test/CodeGen/X86/coalescer-cross.ll new file mode 100644 index 00000000000..1da214c6103 --- /dev/null +++ b/llvm/test/CodeGen/X86/coalescer-cross.ll @@ -0,0 +1,41 @@ +; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin10 | not grep movaps +; rdar://6509240 + +	type { %struct.TValue }		; type %0 +	type { %struct.L_Umaxalign, i32, %struct.Node* }		; type %1 +	%struct.CallInfo = type { %struct.TValue*, %struct.TValue*, %struct.TValue*, i32*, i32, i32 } +	%struct.GCObject = type { %struct.lua_State } +	%struct.L_Umaxalign = type { double } +	%struct.Mbuffer = type { i8*, i32, i32 } +	%struct.Node = type { %struct.TValue, %struct.TKey } +	%struct.TKey = type { %1 } +	%struct.TString = type { %struct.anon } +	%struct.TValue = type { %struct.L_Umaxalign, i32 } +	%struct.Table = type { %struct.GCObject*, i8, i8, i8, i8, %struct.Table*, %struct.TValue*, %struct.Node*, %struct.Node*, %struct.GCObject*, i32 } +	%struct.UpVal = type { %struct.GCObject*, i8, i8, %struct.TValue*, %0 } +	%struct.anon = type { %struct.GCObject*, i8, i8, i8, i32, i32 } +	%struct.global_State = type { %struct.stringtable, i8* (i8*, i8*, i32, i32)*, i8*, i8, i8, i32, %struct.GCObject*, %struct.GCObject**, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.Mbuffer, i32, i32, i32, i32, i32, i32, i32 (%struct.lua_State*)*, %struct.TValue, %struct.lua_State*, %struct.UpVal, [9 x %struct.Table*], [17 x %struct.TString*] } +	%struct.lua_Debug = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 } +	%struct.lua_State = type { %struct.GCObject*, i8, i8, i8, %struct.TValue*, %struct.TValue*, %struct.global_State*, %struct.CallInfo*, i32*, %struct.TValue*, %struct.TValue*, %struct.CallInfo*, %struct.CallInfo*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State*, %struct.lua_Debug*)*, %struct.TValue, %struct.TValue, %struct.GCObject*, %struct.GCObject*, %struct.lua_longjmp*, i32 } +	%struct.lua_longjmp = type { %struct.lua_longjmp*, [18 x i32], i32 } +	%struct.stringtable = type { %struct.GCObject**, i32, i32 } +@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.lua_State*)* @os_clock to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0] + +define i32 @os_clock(%struct.lua_State* nocapture %L) nounwind ssp { +entry: +	%0 = tail call i32 @"\01_clock$UNIX2003"() nounwind		; <i32> [#uses=1] +	%1 = uitofp i32 %0 to double		; <double> [#uses=1] +	%2 = fdiv double %1, 1.000000e+06		; <double> [#uses=1] +	%3 = getelementptr %struct.lua_State* %L, i32 0, i32 4		; <%struct.TValue**> [#uses=3] +	%4 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=2] +	%5 = getelementptr %struct.TValue* %4, i32 0, i32 0, i32 0		; <double*> [#uses=1] +	store double %2, double* %5, align 4 +	%6 = getelementptr %struct.TValue* %4, i32 0, i32 1		; <i32*> [#uses=1] +	store i32 3, i32* %6, align 4 +	%7 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=1] +	%8 = getelementptr %struct.TValue* %7, i32 1		; <%struct.TValue*> [#uses=1] +	store %struct.TValue* %8, %struct.TValue** %3, align 4 +	ret i32 1 +} + +declare i32 @"\01_clock$UNIX2003"() diff --git a/llvm/test/CodeGen/X86/stack-color-with-reg.ll b/llvm/test/CodeGen/X86/stack-color-with-reg.ll index 72a985a6c29..74326a382c4 100644 --- a/llvm/test/CodeGen/X86/stack-color-with-reg.ll +++ b/llvm/test/CodeGen/X86/stack-color-with-reg.ll @@ -1,6 +1,7 @@  ; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t  ; RUN:   grep stackcoloring %t | grep "loads eliminated"  -; RUN:   grep stackcoloring %t | grep "stores eliminated" +; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 5 +; RUN:   grep asm-printer %t   | grep 176  	type { [62 x %struct.Bitvec*] }		; type %0  	type { i8* }		; type %1 diff --git a/llvm/test/CodeGen/X86/x86-64-sret-return.ll b/llvm/test/CodeGen/X86/x86-64-sret-return.ll index 9298661998b..458030c2791 100644 --- a/llvm/test/CodeGen/X86/x86-64-sret-return.ll +++ b/llvm/test/CodeGen/X86/x86-64-sret-return.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc | grep {movq	%rdi, %rax} +; RUN: llvm-as < %s | llc  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"  target triple = "x86_64-apple-darwin8"  | 

