diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-02-21 19:12:08 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-02-21 19:12:08 +0000 |
commit | e0bf7d02f037a5ba015dd468b483c17350b7d7b4 (patch) | |
tree | c6718969db45ad3f71106a4a55dceea77e527ce5 /llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | |
parent | ebfe01c121e304f3e705cfd40536a8ff02ed0547 (diff) | |
download | bcm5719-llvm-e0bf7d02f037a5ba015dd468b483c17350b7d7b4.tar.gz bcm5719-llvm-e0bf7d02f037a5ba015dd468b483c17350b7d7b4.zip |
AMDGPU: Don't use stack space for SGPR->VGPR spills
Before frame offsets are calculated, try to eliminate the
frame indexes used by SGPR spills. Then we can delete them
after.
I think for now we can be sure that no other instruction
will be re-using the same frame indexes. It should be easy
to notice if this assumption ever breaks since everything
asserts if it tries to use a dead frame index later.
The unused emergency stack slot seems to still be left behind,
so an additional 4 bytes is still wasted.
llvm-svn: 295753
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 93 |
1 files changed, 51 insertions, 42 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index ecd46b95ca6..efae3382f49 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -20,12 +20,6 @@ using namespace llvm; -static cl::opt<bool> EnableSpillSGPRToVGPR( - "amdgpu-spill-sgpr-to-vgpr", - cl::desc("Enable spilling VGPRs to SGPRs"), - cl::ReallyHidden, - cl::init(true)); - SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), TIDReg(AMDGPU::NoRegister), @@ -193,45 +187,60 @@ unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) { return PrivateMemoryPtrUserSGPR; } -SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg ( - MachineFunction *MF, - unsigned FrameIndex, - unsigned SubIdx) { - if (!EnableSpillSGPRToVGPR) - return SpilledReg(); - - const SISubtarget &ST = MF->getSubtarget<SISubtarget>(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - - MachineFrameInfo &FrameInfo = MF->getFrameInfo(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - int64_t Offset = FrameInfo.getObjectOffset(FrameIndex); - Offset += SubIdx * 4; - - unsigned LaneVGPRIdx = Offset / (64 * 4); - unsigned Lane = (Offset / 4) % 64; - - struct SpilledReg Spill; - Spill.Lane = Lane; - - if (!LaneVGPRs.count(LaneVGPRIdx)) { - unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, - *MF); +/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. +bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, + int FI) { + std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI]; - if (LaneVGPR == AMDGPU::NoRegister) - // We have no VGPRs left for spilling SGPRs. - return Spill; + // This has already been allocated. + if (!SpillLanes.empty()) + return true; - LaneVGPRs[LaneVGPRIdx] = LaneVGPR; - - // Add this register as live-in to all blocks to avoid machine verifer - // complaining about use of an undefined physical register. - for (MachineFunction::iterator BI = MF->begin(), BE = MF->end(); - BI != BE; ++BI) { - BI->addLiveIn(LaneVGPR); + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned WaveSize = ST.getWavefrontSize(); + + unsigned Size = FrameInfo.getObjectSize(FI); + assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size"); + assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs"); + + int NumLanes = Size / 4; + + // Make sure to handle the case where a wide SGPR spill may span between two + // VGPRs. + for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { + unsigned LaneVGPR; + unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize); + + if (VGPRIndex == 0) { + LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF); + if (LaneVGPR == AMDGPU::NoRegister) { + // We have no VGPRs left for spilling SGPRs. Reset because we won't + // partially spill the SGPR to VGPRs. + SGPRToVGPRSpills.erase(FI); + NumVGPRSpillLanes -= I; + return false; + } + + SpillVGPRs.push_back(LaneVGPR); + + // Add this register as live-in to all blocks to avoid machine verifer + // complaining about use of an undefined physical register. + for (MachineBasicBlock &BB : MF) + BB.addLiveIn(LaneVGPR); + } else { + LaneVGPR = SpillVGPRs.back(); } + + SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); } - Spill.VGPR = LaneVGPRs[LaneVGPRIdx]; - return Spill; + return true; +} + +void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) { + for (auto &R : SGPRToVGPRSpills) + MFI.RemoveStackObject(R.first); } |