summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-02-21 19:12:08 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-02-21 19:12:08 +0000
commite0bf7d02f037a5ba015dd468b483c17350b7d7b4 (patch)
treec6718969db45ad3f71106a4a55dceea77e527ce5 /llvm/lib/Target
parentebfe01c121e304f3e705cfd40536a8ff02ed0547 (diff)
downloadbcm5719-llvm-e0bf7d02f037a5ba015dd468b483c17350b7d7b4.tar.gz
bcm5719-llvm-e0bf7d02f037a5ba015dd468b483c17350b7d7b4.zip
AMDGPU: Don't use stack space for SGPR->VGPR spills
Before frame offsets are calculated, try to eliminate the frame indexes used by SGPR spills. Then we can delete them after. I think for now we can be sure that no other instruction will be re-using the same frame indexes. It should be easy to notice if this assumption ever breaks since everything asserts if it tries to use a dead frame index later. The unused emergency stack slot seems to still be left behind, so an additional 4 bytes is still wasted. llvm-svn: 295753
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp50
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp93
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h26
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp111
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h29
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td2
8 files changed, 225 insertions, 90 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 3cb9ba32628..03de4bf508f 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -128,13 +128,12 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(ST, MF);
AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
- // Skip the last 2 elements because the last one is reserved for VCC, and
- // this is the 2nd to last element already.
+ // Skip the last N reserved elements because they should have already been
+ // reserved for VCC etc.
for (MCPhysReg Reg : AllSGPR128s) {
// Pick the first unallocated one. Make sure we don't clobber the other
// reserved input we needed.
if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
- //assert(MRI.isAllocatable(Reg));
MRI.replaceRegWith(ScratchRsrcReg, Reg);
MFI->setScratchRSrcReg(Reg);
return Reg;
@@ -157,7 +156,6 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
MachineRegisterInfo &MRI = MF.getRegInfo();
-
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
@@ -393,17 +391,45 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
if (!MFI.hasStackObjects())
return;
- bool MayNeedScavengingEmergencySlot = MFI.hasStackObjects();
+ assert(RS && "RegScavenger required if spilling");
+ int ScavengeFI = MFI.CreateStackObject(
+ AMDGPU::SGPR_32RegClass.getSize(),
+ AMDGPU::SGPR_32RegClass.getAlignment(), false);
+ RS->addScavengingFrameIndex(ScavengeFI);
+
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo &TRI = TII->getRegisterInfo();
+ if (!TRI.spillSGPRToVGPR())
+ return;
- assert((RS || !MayNeedScavengingEmergencySlot) &&
- "RegScavenger required if spilling");
+ SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ if (!FuncInfo->hasSpilledSGPRs())
+ return;
- if (MayNeedScavengingEmergencySlot) {
- int ScavengeFI = MFI.CreateStackObject(
- AMDGPU::SGPR_32RegClass.getSize(),
- AMDGPU::SGPR_32RegClass.getAlignment(), false);
- RS->addScavengingFrameIndex(ScavengeFI);
+ // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
+ // are spilled to VGPRs, in which case we can eliminate the stack usage.
+ //
+ // XXX - This operates under the assumption that only other SGPR spills are
+ // users of the frame index. I'm not 100% sure this is correct. The
+ // StackColoring pass has a comment saying a future improvement would be to
+ // merging of allocas with spill slots, but for now according to
+ // MachineFrameInfo isSpillSlot can't alias any other object.
+ for (MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::iterator Next;
+ for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
+ MachineInstr &MI = *I;
+ Next = std::next(I);
+
+ if (TII->isSGPRSpill(MI)) {
+ int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
+ if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI))
+ TRI.eliminateSGPRToVGPRSpillFrameIndex(MI, FI, RS);
+ }
+ }
}
+
+ FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
}
void SIFrameLowering::emitDebuggerPrologue(MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 051f2153ccb..7005c6a85c0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4673,6 +4673,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &AMDGPU::SReg_128RegClass);
case 256:
return std::make_pair(0U, &AMDGPU::SReg_256RegClass);
+ case 512:
+ return std::make_pair(0U, &AMDGPU::SReg_512RegClass);
}
case 'v':
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 51006589b5c..085ef067f2d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -36,7 +36,7 @@ BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
cl::desc("Restrict range of branch instructions (DEBUG)"));
SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
- : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
+ : AMDGPUInstrInfo(ST), RI(ST), ST(ST) {}
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index ecd46b95ca6..efae3382f49 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -20,12 +20,6 @@
using namespace llvm;
-static cl::opt<bool> EnableSpillSGPRToVGPR(
- "amdgpu-spill-sgpr-to-vgpr",
- cl::desc("Enable spilling VGPRs to SGPRs"),
- cl::ReallyHidden,
- cl::init(true));
-
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
TIDReg(AMDGPU::NoRegister),
@@ -193,45 +187,60 @@ unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
return PrivateMemoryPtrUserSGPR;
}
-SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
- MachineFunction *MF,
- unsigned FrameIndex,
- unsigned SubIdx) {
- if (!EnableSpillSGPRToVGPR)
- return SpilledReg();
-
- const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
-
- MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- int64_t Offset = FrameInfo.getObjectOffset(FrameIndex);
- Offset += SubIdx * 4;
-
- unsigned LaneVGPRIdx = Offset / (64 * 4);
- unsigned Lane = (Offset / 4) % 64;
-
- struct SpilledReg Spill;
- Spill.Lane = Lane;
-
- if (!LaneVGPRs.count(LaneVGPRIdx)) {
- unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass,
- *MF);
+/// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI.
+bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
+ int FI) {
+ std::vector<SpilledReg> &SpillLanes = SGPRToVGPRSpills[FI];
- if (LaneVGPR == AMDGPU::NoRegister)
- // We have no VGPRs left for spilling SGPRs.
- return Spill;
+ // This has already been allocated.
+ if (!SpillLanes.empty())
+ return true;
- LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
-
- // Add this register as live-in to all blocks to avoid machine verifer
- // complaining about use of an undefined physical register.
- for (MachineFunction::iterator BI = MF->begin(), BE = MF->end();
- BI != BE; ++BI) {
- BI->addLiveIn(LaneVGPR);
+ const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ unsigned WaveSize = ST.getWavefrontSize();
+
+ unsigned Size = FrameInfo.getObjectSize(FI);
+ assert(Size >= 4 && Size <= 64 && "invalid sgpr spill size");
+ assert(TRI->spillSGPRToVGPR() && "not spilling SGPRs to VGPRs");
+
+ int NumLanes = Size / 4;
+
+ // Make sure to handle the case where a wide SGPR spill may span between two
+ // VGPRs.
+ for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) {
+ unsigned LaneVGPR;
+ unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
+
+ if (VGPRIndex == 0) {
+ LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
+ if (LaneVGPR == AMDGPU::NoRegister) {
+ // We have no VGPRs left for spilling SGPRs. Reset because we won't
+ // partially spill the SGPR to VGPRs.
+ SGPRToVGPRSpills.erase(FI);
+ NumVGPRSpillLanes -= I;
+ return false;
+ }
+
+ SpillVGPRs.push_back(LaneVGPR);
+
+ // Add this register as live-in to all blocks to avoid machine verifer
+ // complaining about use of an undefined physical register.
+ for (MachineBasicBlock &BB : MF)
+ BB.addLiveIn(LaneVGPR);
+ } else {
+ LaneVGPR = SpillVGPRs.back();
}
+
+ SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex));
}
- Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
- return Spill;
+ return true;
+}
+
+void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
+ for (auto &R : SGPRToVGPRSpills)
+ MFI.RemoveStackObject(R.first);
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index dc1f22ae60d..ec1d2c37115 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -134,7 +134,8 @@ public:
// FIXME: Make private
unsigned LDSWaveSpillSize;
unsigned PSInputEna;
- std::map<unsigned, unsigned> LaneVGPRs;
+
+
unsigned ScratchOffsetReg;
unsigned NumUserSGPRs;
unsigned NumSystemSGPRs;
@@ -195,12 +196,29 @@ public:
bool hasReg() { return VGPR != AMDGPU::NoRegister;}
};
- // SIMachineFunctionInfo definition
+private:
+ // SGPR->VGPR spilling support.
+ typedef std::pair<unsigned, unsigned> SpillRegMask;
+
+ // Track VGPR + wave index for each subregister of the SGPR spilled to
+ // frameindex key.
+ DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
+ unsigned NumVGPRSpillLanes = 0;
+ SmallVector<unsigned, 2> SpillVGPRs;
+
+public:
SIMachineFunctionInfo(const MachineFunction &MF);
- SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex,
- unsigned SubIdx);
+ ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
+ auto I = SGPRToVGPRSpills.find(FrameIndex);
+ return (I == SGPRToVGPRSpills.end()) ?
+ ArrayRef<SpilledReg>() : makeArrayRef(I->second);
+ }
+
+ bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
+ void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
+
bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
unsigned getTIDReg() const { return TIDReg; };
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index a90fc28ced3..39324cbbcc0 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -24,12 +24,6 @@
using namespace llvm;
-static cl::opt<bool> EnableSpillSGPRToSMEM(
- "amdgpu-spill-sgpr-to-smem",
- cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
- cl::init(false));
-
-
static bool hasPressureSet(const int *PSets, unsigned PSetID) {
for (unsigned i = 0; PSets[i] != -1; ++i) {
if (PSets[i] == (int)PSetID)
@@ -49,9 +43,28 @@ void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
}
}
-SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo(),
- SGPRPressureSets(getNumRegPressureSets()),
- VGPRPressureSets(getNumRegPressureSets()) {
+static cl::opt<bool> EnableSpillSGPRToSMEM(
+ "amdgpu-spill-sgpr-to-smem",
+ cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
+ cl::init(false));
+
+static cl::opt<bool> EnableSpillSGPRToVGPR(
+ "amdgpu-spill-sgpr-to-vgpr",
+ cl::desc("Enable spilling VGPRs to SGPRs"),
+ cl::ReallyHidden,
+ cl::init(true));
+
+SIRegisterInfo::SIRegisterInfo(const SISubtarget &ST) :
+ AMDGPURegisterInfo(),
+ SGPRPressureSets(getNumRegPressureSets()),
+ VGPRPressureSets(getNumRegPressureSets()),
+ SpillSGPRToVGPR(false),
+ SpillSGPRToSMEM(false) {
+ if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
+ SpillSGPRToSMEM = true;
+ else if (EnableSpillSGPRToVGPR)
+ SpillSGPRToVGPR = true;
+
unsigned NumRegPressureSets = getNumRegPressureSets();
SGPRSetID = NumRegPressureSets;
@@ -557,11 +570,20 @@ static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
}
-void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
+bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
int Index,
- RegScavenger *RS) const {
+ RegScavenger *RS,
+ bool OnlyToVGPR) const {
MachineBasicBlock *MBB = MI->getParent();
MachineFunction *MF = MBB->getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
+ = MFI->getSGPRToVGPRSpills(Index);
+ bool SpillToVGPR = !VGPRSpills.empty();
+ if (OnlyToVGPR && !SpillToVGPR)
+ return false;
+
MachineRegisterInfo &MRI = MF->getRegInfo();
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -570,10 +592,11 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
bool IsKill = MI->getOperand(0).isKill();
const DebugLoc &DL = MI->getDebugLoc();
- SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
+ bool SpillToSMEM = spillSGPRToSMEM();
+ if (SpillToSMEM && OnlyToVGPR)
+ return false;
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
@@ -646,9 +669,9 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
continue;
}
- struct SIMachineFunctionInfo::SpilledReg Spill =
- MFI->getSpilledReg(MF, Index, i);
- if (Spill.hasReg()) {
+ if (SpillToVGPR) {
+ SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
+
BuildMI(*MBB, MI, DL,
TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
Spill.VGPR)
@@ -659,6 +682,10 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
// frame index, we should delete the frame index when all references to
// it are fixed.
} else {
+ // XXX - Can to VGPR spill fail for some subregisters but not others?
+ if (OnlyToVGPR)
+ return false;
+
// Spill SGPR to a frame index.
// TODO: Should VI try to spill to VGPR and then spill to SMEM?
unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -702,22 +729,33 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
MI->eraseFromParent();
MFI->addToSpilledSGPRs(NumSubRegs);
+ return true;
}
-void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
+bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
int Index,
- RegScavenger *RS) const {
+ RegScavenger *RS,
+ bool OnlyToVGPR) const {
MachineFunction *MF = MI->getParent()->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock *MBB = MI->getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+
+ ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills
+ = MFI->getSGPRToVGPRSpills(Index);
+ bool SpillToVGPR = !VGPRSpills.empty();
+ if (OnlyToVGPR && !SpillToVGPR)
+ return false;
+
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
const DebugLoc &DL = MI->getDebugLoc();
unsigned SuperReg = MI->getOperand(0).getReg();
- bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
+ bool SpillToSMEM = spillSGPRToSMEM();
+ if (SpillToSMEM && OnlyToVGPR)
+ return false;
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
@@ -785,10 +823,8 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
continue;
}
- SIMachineFunctionInfo::SpilledReg Spill
- = MFI->getSpilledReg(MF, Index, i);
-
- if (Spill.hasReg()) {
+ if (SpillToVGPR) {
+ SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
auto MIB =
BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
SubReg)
@@ -798,6 +834,9 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
if (NumSubRegs > 1)
MIB.addReg(SuperReg, RegState::ImplicitDefine);
} else {
+ if (OnlyToVGPR)
+ return false;
+
// Restore SGPR from a stack slot.
// FIXME: We should use S_LOAD_DWORD here for VI.
unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -832,6 +871,32 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
}
MI->eraseFromParent();
+ return true;
+}
+
+/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
+/// a VGPR and the stack slot can be safely eliminated when all other users are
+/// handled.
+bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
+ MachineBasicBlock::iterator MI,
+ int FI,
+ RegScavenger *RS) const {
+ switch (MI->getOpcode()) {
+ case AMDGPU::SI_SPILL_S512_SAVE:
+ case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S128_SAVE:
+ case AMDGPU::SI_SPILL_S64_SAVE:
+ case AMDGPU::SI_SPILL_S32_SAVE:
+ return spillSGPR(MI, FI, RS, true);
+ case AMDGPU::SI_SPILL_S512_RESTORE:
+ case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_S128_RESTORE:
+ case AMDGPU::SI_SPILL_S64_RESTORE:
+ case AMDGPU::SI_SPILL_S32_RESTORE:
+ return restoreSGPR(MI, FI, RS, true);
+ default:
+ llvm_unreachable("not an SGPR spill instruction");
+ }
}
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index c95492ffd22..679ed229758 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -21,8 +21,8 @@
namespace llvm {
-class SISubtarget;
class MachineRegisterInfo;
+class SISubtarget;
class SIMachineFunctionInfo;
class SIRegisterInfo final : public AMDGPURegisterInfo {
@@ -31,13 +31,22 @@ private:
unsigned VGPRSetID;
BitVector SGPRPressureSets;
BitVector VGPRPressureSets;
+ bool SpillSGPRToVGPR;
+ bool SpillSGPRToSMEM;
void reserveRegisterTuples(BitVector &, unsigned Reg) const;
void classifyPressureSet(unsigned PSetID, unsigned Reg,
BitVector &PressureSets) const;
-
public:
- SIRegisterInfo();
+ SIRegisterInfo(const SISubtarget &ST);
+
+ bool spillSGPRToVGPR() const {
+ return SpillSGPRToVGPR;
+ }
+
+ bool spillSGPRToSMEM() const {
+ return SpillSGPRToSMEM;
+ }
/// Return the end register initially reserved for the scratch buffer in case
/// spilling is needed.
@@ -78,16 +87,22 @@ public:
const TargetRegisterClass *getPointerRegClass(
const MachineFunction &MF, unsigned Kind = 0) const override;
- void spillSGPR(MachineBasicBlock::iterator MI,
- int FI, RegScavenger *RS) const;
+ /// If \p OnlyToVGPR is true, this will only succeed if this
+ bool spillSGPR(MachineBasicBlock::iterator MI,
+ int FI, RegScavenger *RS,
+ bool OnlyToVGPR = false) const;
- void restoreSGPR(MachineBasicBlock::iterator MI,
- int FI, RegScavenger *RS) const;
+ bool restoreSGPR(MachineBasicBlock::iterator MI,
+ int FI, RegScavenger *RS,
+ bool OnlyToVGPR = false) const;
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS) const override;
+ bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI,
+ int FI, RegScavenger *RS) const;
+
unsigned getHWRegIndex(unsigned Reg) const {
return getEncodingValue(Reg) & 0xff;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index fdf82a6d818..9fdd5afc3c4 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -325,7 +325,7 @@ def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
let AllocationPriority = 11;
}
-def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 32, (add SGPR_512)> {
+def SReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add SGPR_512)> {
// Requires 8 s_mov_b64 to copy
let CopyCost = 8;
let AllocationPriority = 12;
OpenPOWER on IntegriCloud