diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-02-21 19:12:08 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-02-21 19:12:08 +0000 |
| commit | e0bf7d02f037a5ba015dd468b483c17350b7d7b4 (patch) | |
| tree | c6718969db45ad3f71106a4a55dceea77e527ce5 /llvm/lib/Target | |
| parent | ebfe01c121e304f3e705cfd40536a8ff02ed0547 (diff) | |
| download | bcm5719-llvm-e0bf7d02f037a5ba015dd468b483c17350b7d7b4.tar.gz bcm5719-llvm-e0bf7d02f037a5ba015dd468b483c17350b7d7b4.zip | |
AMDGPU: Don't use stack space for SGPR->VGPR spills
Before frame offsets are calculated, try to eliminate the
frame indexes used by SGPR spills. Then we can delete them
after.
I think for now we can be sure that no other instruction
will be re-using the same frame indexes. It should be easy
to notice if this assumption ever breaks since everything
asserts if it tries to use a dead frame index later.
The unused emergency stack slot seems to still be left behind,
so an additional 4 bytes is still wasted.
llvm-svn: 295753
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 50 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 93 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 26 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 111 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 29 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 2 |
8 files changed, 225 insertions, 90 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 3cb9ba32628..03de4bf508f 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -128,13 +128,12 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF); AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded)); - // Skip the last 2 elements because the last one is reserved for VCC, and - // this is the 2nd to last element already. + // Skip the last N reserved elements because they should have already been + // reserved for VCC etc. for (MCPhysReg Reg : AllSGPR128s) { // Pick the first unallocated one. Make sure we don't clobber the other // reserved input we needed. if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) { - //assert(MRI.isAllocatable(Reg)); MRI.replaceRegWith(ScratchRsrcReg, Reg); MFI->setScratchRSrcReg(Reg); return Reg; @@ -157,7 +156,6 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( unsigned ScratchRsrcReg = MFI->getScratchRSrcReg(); MachineRegisterInfo &MRI = MF.getRegInfo(); - unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF); @@ -393,17 +391,45 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( if (!MFI.hasStackObjects()) return; - bool MayNeedScavengingEmergencySlot = MFI.hasStackObjects(); + assert(RS && "RegScavenger required if spilling"); + int ScavengeFI = MFI.CreateStackObject( + AMDGPU::SGPR_32RegClass.getSize(), + AMDGPU::SGPR_32RegClass.getAlignment(), false); + RS->addScavengingFrameIndex(ScavengeFI); + + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + if (!TRI.spillSGPRToVGPR()) + return; - assert((RS || !MayNeedScavengingEmergencySlot) && - "RegScavenger required if spilling"); + SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); + if (!FuncInfo->hasSpilledSGPRs()) + return; - if (MayNeedScavengingEmergencySlot) { - int ScavengeFI = MFI.CreateStackObject( - AMDGPU::SGPR_32RegClass.getSize(), - AMDGPU::SGPR_32RegClass.getAlignment(), false); - RS->addScavengingFrameIndex(ScavengeFI); + // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs + // are spilled to VGPRs, in which case we can eliminate the stack usage. + // + // XXX - This operates under the assumption that only other SGPR spills are + // users of the frame index. I'm not 100% sure this is correct. The + // StackColoring pass has a comment saying a future improvement would be to + // merging of allocas with spill slots, but for now according to + // MachineFrameInfo isSpillSlot can't alias any other object. + for (MachineBasicBlock &MBB : MF) { + MachineBasicBlock::iterator Next; + for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) { + MachineInstr &MI = *I; + Next = std::next(I); + + if (TII->isSGPRSpill(MI)) { + int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex(); + if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) + TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS); + } + } } + + FuncInfo->removeSGPRToVGPRFrameIndices(MFI); } void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 051f2153ccb..7005c6a85c0 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4673,6 +4673,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &AMDGPU::SReg_128RegClass); case 256: return std::make_pair(0U, &AMDGPU::SReg_256RegClass); + case 512: + return std::make_pair(0U, &AMDGPU::SReg_512RegClass); } case 'v': diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 51006589b5c..085ef067f2d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -36,7 +36,7 @@ BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16), cl::desc("Restrict range of branch instructions (DEBUG)")); SIInstrInfo::SIInstrInfo(const SISubtarget &ST) - : AMDGPUInstrInfo(ST), RI(), ST(ST) {} + : AMDGPUInstrInfo(ST), RI(ST), ST(ST) {} //===----------------------------------------------------------------------===// // TargetInstrInfo callbacks diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index ecd46b95ca6..efae3382f49 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -20,12 +20,6 @@ using namespace llvm; -static cl::opt<bool> EnableSpillSGPRToVGPR( - "amdgpu-spill-sgpr-to-vgpr", - cl::desc("Enable spilling VGPRs to SGPRs"), - cl::ReallyHidden, - cl::init(true)); - SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), TIDReg(AMDGPU::NoRegister), @@ -193,45 +187,60 @@ unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) { return PrivateMemoryPtrUserSGPR; } -SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg ( - MachineFunction *MF, - unsigned FrameIndex, - unsigned SubIdx) { - if (!EnableSpillSGPRToVGPR) - return SpilledReg(); - - const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - - MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - int64_t Offset = FrameInfo.getObjectOffset(FrameIndex); - Offset += SubIdx * 4; - - unsigned LaneVGPRIdx = Offset / (64 * 4); - unsigned Lane = (Offset / 4) % 64; - - struct SpilledReg Spill; - Spill.Lane = Lane; - - if (!LaneVGPRs.count(LaneVGPRIdx)) { - unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, - *MF); +/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. +bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, + int FI) { + std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; - if (LaneVGPR == AMDGPU::NoRegister) - // We have no VGPRs left for spilling SGPRs. - return Spill; + // This has already been allocated. + if (!SpillLanes.empty()) + return true; - LaneVGPRs[LaneVGPRIdx] = LaneVGPR; - - // Add this register as live-in to all blocks to avoid machine verifer - // complaining about use of an undefined physical register. - for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); - BI != BE; ++BI) { - BI->addLiveIn(LaneVGPR); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned WaveSize = ST.getWavefrontSize(); + + unsigned Size = FrameInfo.getObjectSize(FI); + assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); + assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); + + int NumLanes = Size / 4; + + // Make sure to handle the case where a wide SGPR spill may span between two + // VGPRs. + for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { + unsigned LaneVGPR; + unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); + + if (VGPRIndex == 0) { + LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); + if (LaneVGPR == AMDGPU::NoRegister) { + // We have no VGPRs left for spilling SGPRs. Reset because we won't + // partially spill the SGPR to VGPRs. + SGPRToVGPRSpills.erase(FI); + NumVGPRSpillLanes -= I; + return false; + } + + SpillVGPRs.push_back(LaneVGPR); + + // Add this register as live-in to all blocks to avoid machine verifer + // complaining about use of an undefined physical register. + for (MachineBasicBlock &BB : MF) + BB.addLiveIn(LaneVGPR); + } else { + LaneVGPR = SpillVGPRs.back(); } + + SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); } - Spill.VGPR = LaneVGPRs[LaneVGPRIdx]; - return Spill; + return true; +} + +void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) { + for (auto &R : SGPRToVGPRSpills) + MFI.RemoveStackObject(R.first); } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index dc1f22ae60d..ec1d2c37115 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -134,7 +134,8 @@ public: // FIXME: Make private unsigned LDSWaveSpillSize; unsigned PSInputEna; - std::map<unsigned, unsigned> LaneVGPRs; + + unsigned ScratchOffsetReg; unsigned NumUserSGPRs; unsigned NumSystemSGPRs; @@ -195,12 +196,29 @@ public: bool hasReg() { return VGPR != AMDGPU::NoRegister;} }; - // SIMachineFunctionInfo definition +private: + // SGPR->VGPR spilling support. + typedef std::pair<unsigned, unsigned> SpillRegMask; + + // Track VGPR + wave index for each subregister of the SGPR spilled to + // frameindex key. + DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; + unsigned NumVGPRSpillLanes = 0; + SmallVector<unsigned, 2> SpillVGPRs; + +public: SIMachineFunctionInfo(const MachineFunction &MF); - SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex, - unsigned SubIdx); + ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const { + auto I = SGPRToVGPRSpills.find(FrameIndex); + return (I == SGPRToVGPRSpills.end()) ? + ArrayRef<SpilledReg>() : makeArrayRef(I->second); + } + + bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); + void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); + bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; }; unsigned getTIDReg() const { return TIDReg; }; void setTIDReg(unsigned Reg) { TIDReg = Reg; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index a90fc28ced3..39324cbbcc0 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -24,12 +24,6 @@ using namespace llvm; -static cl::opt<bool> EnableSpillSGPRToSMEM( - "amdgpu-spill-sgpr-to-smem", - cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), - cl::init(false)); - - static bool hasPressureSet(const int *PSets, unsigned PSetID) { for (unsigned i = 0; PSets[i] != -1; ++i) { if (PSets[i] == (int)PSetID) @@ -49,9 +43,28 @@ void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg, } } -SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo(), - SGPRPressureSets(getNumRegPressureSets()), - VGPRPressureSets(getNumRegPressureSets()) { +static cl::opt<bool> EnableSpillSGPRToSMEM( + "amdgpu-spill-sgpr-to-smem", + cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"), + cl::init(false)); + +static cl::opt<bool> EnableSpillSGPRToVGPR( + "amdgpu-spill-sgpr-to-vgpr", + cl::desc("Enable spilling VGPRs to SGPRs"), + cl::ReallyHidden, + cl::init(true)); + +SIRegisterInfo::SIRegisterInfo(const SISubtarget &ST) : + AMDGPURegisterInfo(), + SGPRPressureSets(getNumRegPressureSets()), + VGPRPressureSets(getNumRegPressureSets()), + SpillSGPRToVGPR(false), + SpillSGPRToSMEM(false) { + if (EnableSpillSGPRToSMEM && ST.hasScalarStores()) + SpillSGPRToSMEM = true; + else if (EnableSpillSGPRToVGPR) + SpillSGPRToVGPR = true; + unsigned NumRegPressureSets = getNumRegPressureSets(); SGPRSetID = NumRegPressureSets; @@ -557,11 +570,20 @@ static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize, AMDGPU::S_BUFFER_LOAD_DWORD_SGPR}; } -void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, +bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, - RegScavenger *RS) const { + RegScavenger *RS, + bool OnlyToVGPR) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MBB->getParent(); + SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); + + ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills + = MFI->getSGPRToVGPRSpills(Index); + bool SpillToVGPR = !VGPRSpills.empty(); + if (OnlyToVGPR && !SpillToVGPR) + return false; + MachineRegisterInfo &MRI = MF->getRegInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); @@ -570,10 +592,11 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, bool IsKill = MI->getOperand(0).isKill(); const DebugLoc &DL = MI->getDebugLoc(); - SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM; + bool SpillToSMEM = spillSGPRToSMEM(); + if (SpillToSMEM && OnlyToVGPR) + return false; assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); @@ -646,9 +669,9 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, continue; } - struct SIMachineFunctionInfo::SpilledReg Spill = - MFI->getSpilledReg(MF, Index, i); - if (Spill.hasReg()) { + if (SpillToVGPR) { + SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; + BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill.VGPR) @@ -659,6 +682,10 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, // frame index, we should delete the frame index when all references to // it are fixed. } else { + // XXX - Can to VGPR spill fail for some subregisters but not others? + if (OnlyToVGPR) + return false; + // Spill SGPR to a frame index. // TODO: Should VI try to spill to VGPR and then spill to SMEM? unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -702,22 +729,33 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, MI->eraseFromParent(); MFI->addToSpilledSGPRs(NumSubRegs); + return true; } -void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, +bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index, - RegScavenger *RS) const { + RegScavenger *RS, + bool OnlyToVGPR) const { MachineFunction *MF = MI->getParent()->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); + + ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills + = MFI->getSGPRToVGPRSpills(Index); + bool SpillToVGPR = !VGPRSpills.empty(); + if (OnlyToVGPR && !SpillToVGPR) + return false; + MachineFrameInfo &FrameInfo = MF->getFrameInfo(); const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const DebugLoc &DL = MI->getDebugLoc(); unsigned SuperReg = MI->getOperand(0).getReg(); - bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM; + bool SpillToSMEM = spillSGPRToSMEM(); + if (SpillToSMEM && OnlyToVGPR) + return false; assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); @@ -785,10 +823,8 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, continue; } - SIMachineFunctionInfo::SpilledReg Spill - = MFI->getSpilledReg(MF, Index, i); - - if (Spill.hasReg()) { + if (SpillToVGPR) { + SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; auto MIB = BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) @@ -798,6 +834,9 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, if (NumSubRegs > 1) MIB.addReg(SuperReg, RegState::ImplicitDefine); } else { + if (OnlyToVGPR) + return false; + // Restore SGPR from a stack slot. // FIXME: We should use S_LOAD_DWORD here for VI. unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); @@ -832,6 +871,32 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, } MI->eraseFromParent(); + return true; +} + +/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to +/// a VGPR and the stack slot can be safely eliminated when all other users are +/// handled. +bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( + MachineBasicBlock::iterator MI, + int FI, + RegScavenger *RS) const { + switch (MI->getOpcode()) { + case AMDGPU::SI_SPILL_S512_SAVE: + case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S128_SAVE: + case AMDGPU::SI_SPILL_S64_SAVE: + case AMDGPU::SI_SPILL_S32_SAVE: + return spillSGPR(MI, FI, RS, true); + case AMDGPU::SI_SPILL_S512_RESTORE: + case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S128_RESTORE: + case AMDGPU::SI_SPILL_S64_RESTORE: + case AMDGPU::SI_SPILL_S32_RESTORE: + return restoreSGPR(MI, FI, RS, true); + default: + llvm_unreachable("not an SGPR spill instruction"); + } } void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index c95492ffd22..679ed229758 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -21,8 +21,8 @@ namespace llvm { -class SISubtarget; class MachineRegisterInfo; +class SISubtarget; class SIMachineFunctionInfo; class SIRegisterInfo final : public AMDGPURegisterInfo { @@ -31,13 +31,22 @@ private: unsigned VGPRSetID; BitVector SGPRPressureSets; BitVector VGPRPressureSets; + bool SpillSGPRToVGPR; + bool SpillSGPRToSMEM; void reserveRegisterTuples(BitVector &, unsigned Reg) const; void classifyPressureSet(unsigned PSetID, unsigned Reg, BitVector &PressureSets) const; - public: - SIRegisterInfo(); + SIRegisterInfo(const SISubtarget &ST); + + bool spillSGPRToVGPR() const { + return SpillSGPRToVGPR; + } + + bool spillSGPRToSMEM() const { + return SpillSGPRToSMEM; + } /// Return the end register initially reserved for the scratch buffer in case /// spilling is needed. @@ -78,16 +87,22 @@ public: const TargetRegisterClass *getPointerRegClass( const MachineFunction &MF, unsigned Kind = 0) const override; - void spillSGPR(MachineBasicBlock::iterator MI, - int FI, RegScavenger *RS) const; + /// If \p OnlyToVGPR is true, this will only succeed if this + bool spillSGPR(MachineBasicBlock::iterator MI, + int FI, RegScavenger *RS, + bool OnlyToVGPR = false) const; - void restoreSGPR(MachineBasicBlock::iterator MI, - int FI, RegScavenger *RS) const; + bool restoreSGPR(MachineBasicBlock::iterator MI, + int FI, RegScavenger *RS, + bool OnlyToVGPR = false) const; void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override; + bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, + int FI, RegScavenger *RS) const; + unsigned getHWRegIndex(unsigned Reg) const { return getEncodingValue(Reg) & 0xff; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index fdf82a6d818..9fdd5afc3c4 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -325,7 +325,7 @@ def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> { let AllocationPriority = 11; } -def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 32, (add SGPR_512)> { +def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512)> { // Requires 8 s_mov_b64 to copy let CopyCost = 8; let AllocationPriority = 12; |

