diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h | 5 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/LiveIntervalAnalysis.cpp | 161 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll | 81 | 
3 files changed, 170 insertions, 77 deletions
diff --git a/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h b/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h index 0318def5a0c..725beb708c5 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -274,8 +274,9 @@ namespace llvm {      /// MI. If it is successul, MI is updated with the newly created MI and      /// returns true.      bool tryFoldMemoryOperand(MachineInstr* &MI, VirtRegMap &vrm, -                              MachineInstr *DefMI, unsigned index, unsigned i, -                              bool isSS, int slot, unsigned reg); +                              MachineInstr *DefMI, unsigned InstrIdx, +                              unsigned OpIdx, unsigned NumUses, +                              bool isSS, int Slot, unsigned Reg);      /// anyKillInMBBAfterIdx - Returns true if there is a kill of the specified      /// VNInfo that's after the specified index but is within the basic block. diff --git a/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index 7d627bb6b2a..f902f4be962 100644 --- a/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -642,13 +642,22 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,  /// MI. If it is successul, MI is updated with the newly created MI and  /// returns true.  bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, -                                         VirtRegMap &vrm, -                                         MachineInstr *DefMI, -                                         unsigned index, unsigned i, -                                         bool isSS, int slot, unsigned reg) { +                                         VirtRegMap &vrm, MachineInstr *DefMI, +                                         unsigned InstrIdx, unsigned OpIdx, +                                         unsigned NumUses, +                                         bool isSS, int Slot, unsigned Reg) { +  // FIXME: fold subreg use +  if (MI->getOperand(OpIdx).getSubReg()) +    return false; + +  // FIXME: It may be possible to fold load when there are multiple uses. +  // e.g. On x86, TEST32rr r, r -> CMP32rm [mem], 0 +  if (NumUses > 1) +    return false; +    MachineInstr *fmi = isSS -    ? mri_->foldMemoryOperand(MI, i, slot) -    : mri_->foldMemoryOperand(MI, i, DefMI); +    ? mri_->foldMemoryOperand(MI, OpIdx, Slot) +    : mri_->foldMemoryOperand(MI, OpIdx, DefMI);    if (fmi) {      // Attempt to fold the memory reference into the instruction. If      // we can do this, we don't need to insert spill code. @@ -657,15 +666,13 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,      else        LiveVariables::transferKillDeadInfo(MI, fmi, mri_);      MachineBasicBlock &MBB = *MI->getParent(); -    if (isSS) { -      if (!mf_->getFrameInfo()->isFixedObjectIndex(slot)) -        vrm.virtFolded(reg, MI, i, fmi); -    } +    if (isSS && !mf_->getFrameInfo()->isFixedObjectIndex(Slot)) +      vrm.virtFolded(Reg, MI, OpIdx, fmi);      vrm.transferSpillPts(MI, fmi);      vrm.transferRestorePts(MI, fmi);      mi2iMap_.erase(MI); -    i2miMap_[index/InstrSlots::NUM] = fmi; -    mi2iMap_[fmi] = index; +    i2miMap_[InstrIdx /InstrSlots::NUM] = fmi; +    mi2iMap_[fmi] = InstrIdx;      MI = MBB.insert(MBB.erase(MI), fmi);      ++numFolds;      return true; @@ -714,8 +721,6 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,      unsigned RegI = Reg;      if (Reg == 0 || MRegisterInfo::isPhysicalRegister(Reg))        continue; -    unsigned SubIdx = mop.getSubReg(); -    bool isSubReg = SubIdx != 0;      if (Reg != li.reg)        continue; @@ -726,6 +731,8 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,        // If this is the rematerializable definition MI itself and        // all of its uses are rematerialized, simply delete it.        if (MI == ReMatOrigDefMI && CanDelete) { +        DOUT << "\t\t\t\tErasing re-materlizable def: "; +        DOUT << MI << '\n';          RemoveMachineInstrFromMaps(MI);          vrm.RemoveMachineInstrFromMaps(MI);          MI->eraseFromParent(); @@ -747,23 +754,6 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,      if (TryFold)        TryFold = !TrySplit && NewVReg == 0; -    // FIXME: fold subreg use -    if (!isSubReg && TryFold && -        tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, i, FoldSS, FoldSlot, -                             Reg)) -      // Folding the load/store can completely change the instruction in -      // unpredictable ways, rescan it from the beginning. -      goto RestartInstruction; - -    // Create a new virtual register for the spill interval. -    bool CreatedNewVReg = false; -    if (NewVReg == 0) { -      NewVReg = RegMap->createVirtualRegister(rc); -      vrm.grow(); -      CreatedNewVReg = true; -    } -    mop.setReg(NewVReg); -                  // Scan all of the operands of this instruction rewriting operands      // to use NewVReg instead of li.reg as appropriate.  We do this for      // two reasons: @@ -775,9 +765,11 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,      //      // Keep track of whether we replace a use and/or def so that we can      // create the spill interval with the appropriate range.  -             +      HasUse = mop.isUse();      HasDef = mop.isDef(); +    unsigned NumUses = HasUse; +    std::vector<unsigned> UpdateOps;      for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) {        if (!MI->getOperand(j).isRegister())          continue; @@ -785,12 +777,37 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,        if (RegJ == 0 || MRegisterInfo::isPhysicalRegister(RegJ))          continue;        if (RegJ == RegI) { -        MI->getOperand(j).setReg(NewVReg); +        UpdateOps.push_back(j); +        if (MI->getOperand(j).isUse()) +          ++NumUses;          HasUse |= MI->getOperand(j).isUse();          HasDef |= MI->getOperand(j).isDef();        }      } +    if (TryFold && +        tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, i, +                             NumUses, FoldSS, FoldSlot, Reg)) { +      // Folding the load/store can completely change the instruction in +      // unpredictable ways, rescan it from the beginning. +      HasUse = false; +      HasDef = false; +      goto RestartInstruction; +    } + +    // Create a new virtual register for the spill interval. +    bool CreatedNewVReg = false; +    if (NewVReg == 0) { +      NewVReg = RegMap->createVirtualRegister(rc); +      vrm.grow(); +      CreatedNewVReg = true; +    } +    mop.setReg(NewVReg); + +    // Reuse NewVReg for other reads. +    for (unsigned j = 0, e = UpdateOps.size(); j != e; ++j) +      MI->getOperand(UpdateOps[j]).setReg(NewVReg); +                  if (CreatedNewVReg) {        if (DefIsReMat) {          vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI/*, CanDelete*/); @@ -1197,46 +1214,44 @@ addIntervalsForSpills(const LiveInterval &li,        for (unsigned i = 0, e = spills.size(); i != e; ++i) {          int index = spills[i].index;          unsigned VReg = spills[i].vreg; -        bool DoFold = spills[i].canFold;          bool isReMat = vrm.isReMaterialized(VReg);          MachineInstr *MI = getInstructionFromIndex(index);          int OpIdx = -1; -        bool FoldedLoad = false; -        if (DoFold) { +        unsigned NumUses = 0; +        if (spills[i].canFold) {            for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {              MachineOperand &MO = MI->getOperand(j);              if (!MO.isRegister() || MO.getReg() != VReg)                continue; -            if (MO.isUse()) { -              // Can't fold if it's two-address code and the use isn't the -              // first and only use. -              // If there are more than one uses, a load is still needed. -              if (!isReMat && !FoldedLoad && -                  alsoFoldARestore(Id, index,VReg,RestoreMBBs,RestoreIdxes)) { -                FoldedLoad = true; -                continue; -              } else { -                OpIdx = -1; -                break; -              } +            if (MO.isDef()) { +              OpIdx = (int)j; +              continue;              } -            OpIdx = (int)j; +            // Can't fold if it's two-address code and the use isn't the +            // first and only use. +            if (isReMat || +                (NumUses == 0 && !alsoFoldARestore(Id, index, VReg, RestoreMBBs, +                                                   RestoreIdxes))) { +              OpIdx = -1; +              break; +            } +            ++NumUses;            }          }          // Fold the store into the def if possible. -        if (OpIdx == -1) -          DoFold = false; -        if (DoFold) { -          if (tryFoldMemoryOperand(MI, vrm, NULL, index,OpIdx,true,Slot,VReg)) { -            if (FoldedLoad) +        bool Folded = false; +        if (OpIdx != -1) { +          if (tryFoldMemoryOperand(MI, vrm, NULL, index, OpIdx, NumUses, +                                   true, Slot, VReg)) { +            if (NumUses)                // Folded a two-address instruction, do not issue a load.                eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes); -          } else -            DoFold = false; +            Folded = true; +          }          }          // Else tell the spiller to issue a store for us. -        if (!DoFold) +        if (!Folded)            vrm.addSpillPoint(VReg, MI);        }        Id = SpillMBBs.find_next(Id); @@ -1251,32 +1266,30 @@ addIntervalsForSpills(const LiveInterval &li,        if (index == -1)          continue;        unsigned VReg = restores[i].vreg; -      bool DoFold = restores[i].canFold;        MachineInstr *MI = getInstructionFromIndex(index); +      unsigned NumUses = 0;        int OpIdx = -1; -      if (DoFold) { +      if (restores[i].canFold) {          for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {            MachineOperand &MO = MI->getOperand(j);            if (!MO.isRegister() || MO.getReg() != VReg)              continue;            if (MO.isDef()) { -            // Can't fold if it's two-address code.             +            // Can't fold if it's two-address code and it hasn't already +            // been folded.              OpIdx = -1;              break;            } -          if (OpIdx != -1) { -            // Multiple uses, do not fold! -            OpIdx = -1; -            break; -          } -          OpIdx = (int)j; +          if (NumUses == 0) +            // Use the first use index. +            OpIdx = (int)j; +          ++NumUses;          }        }        // Fold the load into the use if possible. -      if (OpIdx == -1) -        DoFold = false; -      if (DoFold) { +      bool Folded = false; +      if (OpIdx != -1) {          if (vrm.isReMaterialized(VReg)) {            MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg);            int LdSlot = 0; @@ -1284,17 +1297,15 @@ addIntervalsForSpills(const LiveInterval &li,            // If the rematerializable def is a load, also try to fold it.            if (isLoadSS ||                (ReMatDefMI->getInstrDescriptor()->Flags & M_LOAD_FLAG)) -            DoFold = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, OpIdx, -                                          isLoadSS, LdSlot, VReg); -          else -            DoFold = false; +            Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, OpIdx, +                                          NumUses, isLoadSS, LdSlot, VReg);          } else -          DoFold = tryFoldMemoryOperand(MI, vrm, NULL, index, OpIdx, +          Folded = tryFoldMemoryOperand(MI, vrm, NULL, index, OpIdx, NumUses,                                          true, Slot, VReg);        }        // If folding is not possible / failed, then tell the spiller to issue a        // load / rematerialization for us. -      if (!DoFold) +      if (!Folded)          vrm.addRestorePoint(VReg, MI);      }      Id = RestoreMBBs.find_next(Id); diff --git a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll new file mode 100644 index 00000000000..c123eccef30 --- /dev/null +++ b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll @@ -0,0 +1,81 @@ +; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& \ +; RUN:   grep {1 .*folded into instructions} + +declare fastcc void @rdft(i32, i32, double*, i32*, double*) + +define fastcc void @mp_sqrt(i32 %n, i32 %radix, i32* %in, i32* %out, i32* %tmp1, i32* %tmp2, i32 %nfft, double* %tmp1fft, double* %tmp2fft, i32* %ip, double* %w) { +entry: +	br label %bb.i5 + +bb.i5:		; preds = %bb.i5, %entry +	%nfft_init.0.i = phi i32 [ 1, %entry ], [ %tmp7.i3, %bb.i5 ]		; <i32> [#uses=1] +	%tmp7.i3 = shl i32 %nfft_init.0.i, 1		; <i32> [#uses=2] +	br i1 false, label %bb.i5, label %mp_unexp_mp2d.exit.i + +mp_unexp_mp2d.exit.i:		; preds = %bb.i5 +	br i1 false, label %cond_next.i, label %cond_true.i + +cond_true.i:		; preds = %mp_unexp_mp2d.exit.i +	ret void + +cond_next.i:		; preds = %mp_unexp_mp2d.exit.i +	%tmp22.i = sdiv i32 0, 2		; <i32> [#uses=2] +	br i1 false, label %cond_true29.i, label %cond_next36.i + +cond_true29.i:		; preds = %cond_next.i +	ret void + +cond_next36.i:		; preds = %cond_next.i +	store i32 %tmp22.i, i32* null, align 4 +	%tmp8.i14.i = select i1 false, i32 1, i32 0		; <i32> [#uses=1] +	br label %bb.i28.i + +bb.i28.i:		; preds = %bb.i28.i, %cond_next36.i +	%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ]		; <i32> [#uses=2] +	%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ]		; <double> [#uses=1] +	%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32		; <i32> [#uses=2] +	%tmp4.i19.i = icmp slt i32 %tmp1.i18.i, %radix		; <i1> [#uses=1] +	%x.0.i21.i = select i1 %tmp4.i19.i, i32 %tmp1.i18.i, i32 0		; <i32> [#uses=1] +	%tmp41.sum.i = add i32 %j.0.reg2mem.0.i16.i, 2		; <i32> [#uses=0] +	%tmp1213.i23.i = sitofp i32 %x.0.i21.i to double		; <double> [#uses=1] +	%tmp15.i24.i = sub double 0.000000e+00, %tmp1213.i23.i		; <double> [#uses=1] +	%tmp16.i25.i = mul double 0.000000e+00, %tmp15.i24.i		; <double> [#uses=1] +	%indvar.next39.i = add i32 %j.0.reg2mem.0.i16.i, 1		; <i32> [#uses=2] +	%exitcond40.i = icmp eq i32 %indvar.next39.i, %tmp8.i14.i		; <i1> [#uses=1] +	br i1 %exitcond40.i, label %mp_unexp_d2mp.exit29.i, label %bb.i28.i + +mp_unexp_d2mp.exit29.i:		; preds = %bb.i28.i +	%tmp46.i = sub i32 0, %tmp22.i		; <i32> [#uses=1] +	store i32 %tmp46.i, i32* null, align 4 +	br i1 false, label %bb.i.i, label %mp_sqrt_init.exit + +bb.i.i:		; preds = %bb.i.i, %mp_unexp_d2mp.exit29.i +	br label %bb.i.i + +mp_sqrt_init.exit:		; preds = %mp_unexp_d2mp.exit29.i +	tail call fastcc void @mp_mul_csqu( i32 0, double* %tmp1fft ) +	tail call fastcc void @rdft( i32 0, i32 -1, double* null, i32* %ip, double* %w ) +	tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 0, double* %tmp1fft, i32* %tmp2 ) +	br i1 false, label %cond_false.i, label %cond_true36.i + +cond_true36.i:		; preds = %mp_sqrt_init.exit +	ret void + +cond_false.i:		; preds = %mp_sqrt_init.exit +	tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, i32* %out ) +	tail call fastcc void @mp_add( i32 0, i32 %radix, i32* %tmp1, i32* %tmp2, i32* %tmp1 ) +	tail call fastcc void @mp_sub( i32 0, i32 %radix, i32* %in, i32* %tmp2, i32* %tmp2 ) +	tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, i32* %tmp1 ) +	tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 %tmp7.i3, double* %tmp2fft, i32* %tmp2 ) +	ret void +} + +declare fastcc void @mp_add(i32, i32, i32*, i32*, i32*) + +declare fastcc void @mp_sub(i32, i32, i32*, i32*, i32*) + +declare fastcc void @mp_round(i32, i32, i32, i32*) + +declare fastcc void @mp_mul_csqu(i32, double*) + +declare fastcc void @mp_mul_d2i(i32, i32, i32, double*, i32*)  | 

