diff options
| -rw-r--r-- | llvm/lib/CodeGen/RegAllocGreedy.cpp | 41 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SplitKit.h | 17 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/spill-before-exec.mir | 100 | 
3 files changed, 149 insertions, 9 deletions
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 54f800832d2..64da8485389 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -449,8 +449,8 @@ private:    BlockFrequency calcSpillCost();    bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); -  void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); -  void growRegion(GlobalSplitCandidate &Cand); +  bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); +  bool growRegion(GlobalSplitCandidate &Cand);    bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand,                                    unsigned BBNumber,                                    const AllocationOrder &Order); @@ -1203,6 +1203,13 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,        } else if (Intf.first() < BI.LastInstr) {          ++Ins;        } + +      // Abort if the spill cannot be inserted at the MBB' start +      if (((BC.Entry == SpillPlacement::MustSpill) || +           (BC.Entry == SpillPlacement::PrefSpill)) && +          SlotIndex::isEarlierInstr(BI.FirstInstr, +                                    SA->getFirstSplitPoint(BC.Number))) +        return false;      }      // Interference for the live-out value. @@ -1232,7 +1239,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,  /// addThroughConstraints - Add constraints and links to SpillPlacer from the  /// live-through blocks in Blocks. -void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, +bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,                                       ArrayRef<unsigned> Blocks) {    const unsigned GroupSize = 8;    SpillPlacement::BlockConstraint BCS[GroupSize]; @@ -1256,6 +1263,12 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,      assert(B < GroupSize && "Array overflow");      BCS[B].Number = Number; +    // Abort if the spill cannot be inserted at the MBB' start +    MachineBasicBlock *MBB = MF->getBlockNumbered(Number); +    if (!MBB->empty() && +        SlotIndex::isEarlierInstr(LIS->getInstructionIndex(MBB->instr_front()), +                                  SA->getFirstSplitPoint(Number))) +      return false;      // Interference for the live-in value.      if (Intf.first() <= Indexes->getMBBStartIdx(Number))        BCS[B].Entry = SpillPlacement::MustSpill; @@ -1276,9 +1289,10 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,    SpillPlacer->addConstraints(makeArrayRef(BCS, B));    SpillPlacer->addLinks(makeArrayRef(TBS, T)); +  return true;  } -void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { +bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {    // Keep track of through blocks that have not been added to SpillPlacer.    BitVector Todo = SA->getThroughBlocks();    SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; @@ -1314,9 +1328,10 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {      // Compute through constraints from the interference, or assume that all      // through blocks prefer spilling when forming compact regions.      auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); -    if (Cand.PhysReg) -      addThroughConstraints(Cand.Intf, NewBlocks); -    else +    if (Cand.PhysReg) { +      if (!addThroughConstraints(Cand.Intf, NewBlocks)) +        return false; +    } else        // Provide a strong negative bias on through blocks to prevent unwanted        // liveness on loop backedges.        SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); @@ -1326,6 +1341,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {      SpillPlacer->iterate();    }    LLVM_DEBUG(dbgs() << ", v=" << Visited); +  return true;  }  /// calcCompactRegion - Compute the set of edge bundles that should be live @@ -1356,7 +1372,11 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {      return false;    } -  growRegion(Cand); +  if (!growRegion(Cand)) { +    LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n"); +    return false; +  } +    SpillPlacer->finish();    if (!Cand.LiveBundles.any()) { @@ -1886,7 +1906,10 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,        });        continue;      } -    growRegion(Cand); +    if (!growRegion(Cand)) { +      LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n"); +      continue; +    }      SpillPlacer->finish(); diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h index 8fbe724045e..bcc8f8cf18b 100644 --- a/llvm/lib/CodeGen/SplitKit.h +++ b/llvm/lib/CodeGen/SplitKit.h @@ -25,6 +25,7 @@  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h"  #include "llvm/CodeGen/MachineBasicBlock.h"  #include "llvm/CodeGen/MachineFunction.h"  #include "llvm/CodeGen/SlotIndexes.h" @@ -76,6 +77,18 @@ public:    /// Returns the last insert point as an iterator for \pCurLI in \pMBB.    MachineBasicBlock::iterator getLastInsertPointIter(const LiveInterval &CurLI,                                                       MachineBasicBlock &MBB); + +  /// Return the base index of the first insert point in \pMBB. +  SlotIndex getFirstInsertPoint(MachineBasicBlock &MBB) { +    SlotIndex Res = LIS.getMBBStartIdx(&MBB); +    if (!MBB.empty()) { +      MachineBasicBlock::iterator MII = MBB.SkipPHIsLabelsAndDebug(MBB.begin()); +      if (MII != MBB.end()) +        Res = LIS.getInstructionIndex(*MII); +    } +    return Res; +  } +  };  /// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting @@ -225,6 +238,10 @@ public:    MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock *BB) {      return IPA.getLastInsertPointIter(*CurLI, *BB);    } + +  SlotIndex getFirstSplitPoint(unsigned Num) { +    return IPA.getFirstInsertPoint(*MF.getBlockNumbered(Num)); +  }  };  /// SplitEditor - Edit machine code and LiveIntervals for live range diff --git a/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir b/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir new file mode 100644 index 00000000000..0726a2500d6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir @@ -0,0 +1,100 @@ +# REQUIRES: asserts +# RUN: llc -mtriple=amdgcn--- -verify-machineinstrs -debug-only=regalloc -run-pass=greedy -o /dev/null %s 2>&1 | FileCheck %s + +--- +# Check that physreg candidate is not used since cannot be spilled in a block, +# e.g. before exec mask preamble +# CHECK: , cannot spill all interferences. + +name:            foo +tracksRegLiveness: true +registers: +  - { id: 0, class: sreg_64 } +  - { id: 1100, class: sreg_128 } +  - { id: 1101, class: sreg_128 } +  - { id: 1102, class: sreg_128 } +  - { id: 1103, class: sreg_128 } +  - { id: 1104, class: sreg_128 } +  - { id: 1105, class: sreg_128 } +  - { id: 1106, class: sreg_128 } +  - { id: 1107, class: sreg_128 } +  - { id: 1108, class: sreg_128 } +  - { id: 1109, class: sreg_128 } +  - { id: 1110, class: sreg_128 } +  - { id: 1111, class: sreg_128 } +  - { id: 1112, class: sreg_128 } +  - { id: 1113, class: sreg_128 } +  - { id: 1114, class: sreg_128 } +  - { id: 1115, class: sreg_128 } +  - { id: 1116, class: sreg_128 } +  - { id: 1117, class: sreg_128 } +  - { id: 1118, class: sreg_128 } +  - { id: 1119, class: sreg_128 } +  - { id: 1120, class: sreg_128 } +  - { id: 1121, class: sreg_128 } +body:             | +  bb.0: +    successors: %bb.1 +    liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr100_sgpr101, $sgpr102_sgpr103 +    %0:sreg_64 = COPY $sgpr102_sgpr103 +    %1100 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 +    %1101 = COPY %1100 +    %1102 = COPY %1100 +    %1103 = COPY %1100 +    %1104 = COPY %1100 +    %1105 = COPY %1100 +    %1106 = COPY %1100 +    %1107 = COPY %1100 +    %1108 = COPY %1100 +    %1109 = COPY %1100 +    %1110 = COPY %1100 +    %1111 = COPY %1100 +    %1112 = COPY %1100 +    %1113 = COPY %1100 +    %1114 = COPY %1100 +    %1115 = COPY %1100 +    %1116 = COPY %1100 +    %1117 = COPY %1100 +    %1118 = COPY %1100 +    %1119 = COPY %1100 +    %1120 = COPY %1100 +    %1121 = COPY %1100 +    S_BRANCH %bb.1 + +  bb.1: +    liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr102_sgpr103 +    %0 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec +    $exec = S_XOR_B64_term $exec, %0, implicit-def $scc +    SI_MASK_BRANCH %bb.100, implicit $exec +    S_BRANCH %bb.2 + +  bb.2: +    liveins: $sgpr98_sgpr99, $sgpr102_sgpr103 +    %0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec +    $exec = S_XOR_B64_term $exec, %0, implicit-def $scc +    SI_MASK_BRANCH %bb.100, implicit $exec +    S_BRANCH %bb.200 + +  bb.100: +    liveins: $sgpr102_sgpr103 +    %0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr102_sgpr103, implicit-def $exec, implicit-def $scc, implicit $exec +    $exec = S_XOR_B64_term $exec, %0, implicit-def $scc +    S_BRANCH %bb.200 + +  bb.200: +    S_CMP_EQ_U64 %1100.sub0_sub1, %1101.sub2_sub3, implicit-def $scc +    S_CMP_EQ_U64 %1102.sub0_sub1, %1103.sub2_sub3, implicit-def $scc +    S_CMP_EQ_U64 %1104.sub0_sub1, %1105.sub2_sub3, implicit-def $scc +    S_CMP_EQ_U64 %1106.sub0_sub1, %1107.sub2_sub3, implicit-def $scc +    S_CMP_EQ_U64 %1108.sub0_sub1, %1109.sub2_sub3, implicit-def $scc +    S_CMP_EQ_U64 %1110.sub0_sub1, %1111.sub2_sub3, implicit-def $scc +    S_CMP_EQ_U64 %1112.sub0_sub1, %1113.sub2_sub3, implicit-def $scc +    S_CMP_EQ_U64 %1114.sub0_sub1, %1115.sub2_sub3, implicit-def $scc +    S_CMP_EQ_U64 %1116.sub0_sub1, %1117.sub2_sub3, implicit-def $scc +    S_CMP_EQ_U64 %1118.sub0_sub1, %1119.sub2_sub3, implicit-def $scc +    S_CMP_EQ_U64 %1120.sub0_sub1, %1121.sub2_sub3, implicit-def $scc + +    $vgpr0 = V_MOV_B32_e32 0, implicit $exec +    S_SETPC_B64_return %0, implicit $vgpr0 + +...  | 

