summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-07-11 21:54:13 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-07-11 21:54:13 +0000
commit937ff6e701bac55ee0eecc575f21b03aa8fedb3b (patch)
treeab231945bd31fe9ffb9df412e2aa9375a3f0ed82 /llvm/lib/Target/AMDGPU
parent18b78bfe9e6bdf630f2acbcf813d18cefed372cb (diff)
downloadbcm5719-llvm-937ff6e701bac55ee0eecc575f21b03aa8fedb3b.tar.gz
bcm5719-llvm-937ff6e701bac55ee0eecc575f21b03aa8fedb3b.zip
[AMDGPU] gfx908 agpr spilling
Differential Revision: https://reviews.llvm.org/D64594 llvm-svn: 365833
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp89
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td47
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp44
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp75
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h30
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp125
7 files changed, 367 insertions, 45 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 4e968b67869..d73f2b4abae 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -913,7 +913,6 @@ static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
return true;
}
-
#ifndef NDEBUG
static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
Optional<int> FramePointerSaveIndex) {
@@ -947,6 +946,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
const SIRegisterInfo *TRI = ST.getRegisterInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ FuncInfo->removeDeadFrameIndices(MFI);
assert(allSGPRSpillsAreDead(MFI, None) &&
"SGPR spill should have been removed in SILowerSGPRSpills");
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 88d37992072..f7c23b3d9fb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -976,6 +976,8 @@ static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_S256_SAVE;
case 64:
return AMDGPU::SI_SPILL_S512_SAVE;
+ case 128:
+ return AMDGPU::SI_SPILL_S1024_SAVE;
default:
llvm_unreachable("unknown register size");
}
@@ -997,6 +999,25 @@ static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_V256_SAVE;
case 64:
return AMDGPU::SI_SPILL_V512_SAVE;
+ case 128:
+ return AMDGPU::SI_SPILL_V1024_SAVE;
+ default:
+ llvm_unreachable("unknown register size");
+ }
+}
+
+static unsigned getAGPRSpillSaveOpcode(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMDGPU::SI_SPILL_A32_SAVE;
+ case 8:
+ return AMDGPU::SI_SPILL_A64_SAVE;
+ case 16:
+ return AMDGPU::SI_SPILL_A128_SAVE;
+ case 64:
+ return AMDGPU::SI_SPILL_A512_SAVE;
+ case 128:
+ return AMDGPU::SI_SPILL_A1024_SAVE;
default:
llvm_unreachable("unknown register size");
}
@@ -1055,17 +1076,22 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
return;
}
- assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
-
- unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
+ unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize)
+ : getVGPRSpillSaveOpcode(SpillSize);
MFI->setHasSpilledVGPRs();
- BuildMI(MBB, MI, DL, get(Opcode))
- .addReg(SrcReg, getKillRegState(isKill)) // data
- .addFrameIndex(FrameIndex) // addr
- .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
- .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
- .addImm(0) // offset
- .addMemOperand(MMO);
+
+ auto MIB = BuildMI(MBB, MI, DL, get(Opcode));
+ if (RI.hasAGPRs(RC)) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ MIB.addReg(Tmp, RegState::Define);
+ }
+ MIB.addReg(SrcReg, getKillRegState(isKill)) // data
+ .addFrameIndex(FrameIndex) // addr
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
+ .addImm(0) // offset
+ .addMemOperand(MMO);
}
static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
@@ -1084,6 +1110,8 @@ static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_S256_RESTORE;
case 64:
return AMDGPU::SI_SPILL_S512_RESTORE;
+ case 128:
+ return AMDGPU::SI_SPILL_S1024_RESTORE;
default:
llvm_unreachable("unknown register size");
}
@@ -1105,6 +1133,25 @@ static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_V256_RESTORE;
case 64:
return AMDGPU::SI_SPILL_V512_RESTORE;
+ case 128:
+ return AMDGPU::SI_SPILL_V1024_RESTORE;
+ default:
+ llvm_unreachable("unknown register size");
+ }
+}
+
+static unsigned getAGPRSpillRestoreOpcode(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMDGPU::SI_SPILL_A32_RESTORE;
+ case 8:
+ return AMDGPU::SI_SPILL_A64_RESTORE;
+ case 16:
+ return AMDGPU::SI_SPILL_A128_RESTORE;
+ case 64:
+ return AMDGPU::SI_SPILL_A512_RESTORE;
+ case 128:
+ return AMDGPU::SI_SPILL_A1024_RESTORE;
default:
llvm_unreachable("unknown register size");
}
@@ -1156,15 +1203,19 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
return;
}
- assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
-
- unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
- BuildMI(MBB, MI, DL, get(Opcode), DestReg)
- .addFrameIndex(FrameIndex) // vaddr
- .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
- .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
- .addImm(0) // offset
- .addMemOperand(MMO);
+ unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
+ : getVGPRSpillRestoreOpcode(SpillSize);
+ auto MIB = BuildMI(MBB, MI, DL, get(Opcode), DestReg);
+ if (RI.hasAGPRs(RC)) {
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ MIB.addReg(Tmp, RegState::Define);
+ }
+ MIB.addFrameIndex(FrameIndex) // vaddr
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
+ .addImm(0) // offset
+ .addMemOperand(MMO);
}
/// \param @Offset Offset in bytes of the FrameIndex being spilled
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 4831ede3d54..05fdd3065aa 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -513,6 +513,7 @@ defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
defm SI_SPILL_S160 : SI_SPILL_SGPR <SReg_160>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
+defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;
multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
let UseNamedOperandTable = 1, VGPRSpill = 1,
@@ -524,7 +525,9 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
let mayStore = 1;
let mayLoad = 0;
// (2 * 4) + (8 * num_subregs) bytes maximum
- let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
+ int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
+ // Size field is unsigned char and cannot fit more.
+ let Size = !if(!le(MaxSize, 256), MaxSize, 252);
}
def _RESTORE : VPseudoInstSI <
@@ -535,7 +538,9 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
let mayLoad = 1;
// (2 * 4) + (8 * num_subregs) bytes maximum
- let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
+ int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
+ // Size field is unsigned char and cannot fit more.
+ let Size = !if(!le(MaxSize, 256), MaxSize, 252);
}
} // End UseNamedOperandTable = 1, VGPRSpill = 1, SchedRW = [WriteVMEM]
}
@@ -547,6 +552,44 @@ defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
defm SI_SPILL_V160 : SI_SPILL_VGPR <VReg_160>;
defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
+defm SI_SPILL_V1024 : SI_SPILL_VGPR <VReg_1024>;
+
+multiclass SI_SPILL_AGPR <RegisterClass vgpr_class> {
+ let UseNamedOperandTable = 1, VGPRSpill = 1,
+ Constraints = "@earlyclobber $tmp",
+ SchedRW = [WriteVMEM] in {
+ def _SAVE : VPseudoInstSI <
+ (outs VGPR_32:$tmp),
+ (ins vgpr_class:$vdata, i32imm:$vaddr, SReg_128:$srsrc,
+ SReg_32:$soffset, i32imm:$offset)> {
+ let mayStore = 1;
+ let mayLoad = 0;
+ // (2 * 4) + (16 * num_subregs) bytes maximum
+ int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 4), 8);
+ // Size field is unsigned char and cannot fit more.
+ let Size = !if(!le(MaxSize, 256), MaxSize, 252);
+ }
+
+ def _RESTORE : VPseudoInstSI <
+ (outs vgpr_class:$vdata, VGPR_32:$tmp),
+ (ins i32imm:$vaddr, SReg_128:$srsrc, SReg_32:$soffset,
+ i32imm:$offset)> {
+ let mayStore = 0;
+ let mayLoad = 1;
+
+ // (2 * 4) + (16 * num_subregs) bytes maximum
+ int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 4), 8);
+ // Size field is unsigned char and cannot fit more.
+ let Size = !if(!le(MaxSize, 256), MaxSize, 252);
+ }
+ } // End UseNamedOperandTable = 1, VGPRSpill = 1, SchedRW = [WriteVMEM]
+}
+
+defm SI_SPILL_A32 : SI_SPILL_AGPR <AGPR_32>;
+defm SI_SPILL_A64 : SI_SPILL_AGPR <AReg_64>;
+defm SI_SPILL_A128 : SI_SPILL_AGPR <AReg_128>;
+defm SI_SPILL_A512 : SI_SPILL_AGPR <AReg_512>;
+defm SI_SPILL_A1024 : SI_SPILL_AGPR <AReg_1024>;
def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
(outs SReg_64:$dst),
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 7838a59b633..abfe89491e7 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -37,6 +37,12 @@ using MBBVector = SmallVector<MachineBasicBlock *, 4>;
namespace {
+static cl::opt<bool> EnableSpillVGPRToAGPR(
+ "amdgpu-spill-vgpr-to-agpr",
+ cl::desc("Enable spilling VGPRs to AGPRs"),
+ cl::ReallyHidden,
+ cl::init(true));
+
class SILowerSGPRSpills : public MachineFunctionPass {
private:
const SIRegisterInfo *TRI = nullptr;
@@ -242,10 +248,22 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
return false;
}
+ MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
+ bool AllSGPRSpilledToVGPRs = false;
+ const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
+ && EnableSpillVGPRToAGPR;
+
bool MadeChange = false;
- if (TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) {
+ const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts();
+
+ // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
+ // handled as SpilledToReg in regular PrologEpilogInserter.
+ if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) ||
+ SpillVGPRToAGPR) {
+ AllSGPRSpilledToVGPRs = true;
+
// Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
// are spilled to VGPRs, in which case we can eliminate the stack usage.
//
@@ -257,6 +275,18 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
Next = std::next(I);
+ if (SpillToAGPR && TII->isVGPRSpill(MI)) {
+ // Try to eliminate stack used by VGPR spills before frame
+ // finalization.
+ unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::vaddr);
+ int FI = MI.getOperand(FIOp).getIndex();
+ unsigned VReg = TII->getNamedOperand(MI, AMDGPU::OpName::vdata)
+ ->getReg();
+ if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, TRI->isAGPR(MRI, VReg)))
+ TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr);
+ }
+
if (!TII->isSGPRSpill(MI))
continue;
@@ -266,18 +296,24 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
(void)Spilled;
assert(Spilled && "failed to spill SGPR to VGPR when allocated");
- }
-
+ } else
+ AllSGPRSpilledToVGPRs = false;
}
}
for (MachineBasicBlock &MBB : MF) {
for (auto SSpill : FuncInfo->getSGPRSpillVGPRs())
MBB.addLiveIn(SSpill.VGPR);
+
+ for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
+ MBB.addLiveIn(Reg);
+
+ for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
+ MBB.addLiveIn(Reg);
+
MBB.sortUniqueLiveIns();
}
- FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
MadeChange = true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index a3f6caaacc8..46da974a2f4 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -319,7 +319,75 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
return true;
}
-void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
+/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
+/// Either AGPR is spilled to VGPR to vice versa.
+/// Returns true if a \p FI can be eliminated completely.
+bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
+ int FI,
+ bool isAGPRtoVGPR) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
+ assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));
+
+ auto &Spill = VGPRToAGPRSpills[FI];
+
+ // This has already been allocated.
+ if (!Spill.Lanes.empty())
+ return Spill.FullyAllocated;
+
+ unsigned Size = FrameInfo.getObjectSize(FI);
+ unsigned NumLanes = Size / 4;
+ Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);
+
+ const TargetRegisterClass &RC =
+ isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
+ auto Regs = RC.getRegisters();
+
+ auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ Spill.FullyAllocated = true;
+
+ // FIXME: Move allocation logic out of MachineFunctionInfo and initialize
+ // once.
+ BitVector OtherUsedRegs;
+ OtherUsedRegs.resize(TRI->getNumRegs());
+
+ const uint32_t *CSRMask =
+ TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
+ if (CSRMask)
+ OtherUsedRegs.setBitsInMask(CSRMask);
+
+ // TODO: Should include register tuples, but doesn't matter with current
+ // usage.
+ for (MCPhysReg Reg : SpillAGPR)
+ OtherUsedRegs.set(Reg);
+ for (MCPhysReg Reg : SpillVGPR)
+ OtherUsedRegs.set(Reg);
+
+ SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
+ for (unsigned I = 0; I < NumLanes; ++I) {
+ NextSpillReg = std::find_if(
+ NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
+ return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
+ !OtherUsedRegs[Reg];
+ });
+
+ if (NextSpillReg == Regs.end()) { // Registers exhausted
+ Spill.FullyAllocated = false;
+ break;
+ }
+
+ OtherUsedRegs.set(*NextSpillReg);
+ SpillRegs.push_back(*NextSpillReg);
+ Spill.Lanes[I] = *NextSpillReg++;
+ }
+
+ return Spill.FullyAllocated;
+}
+
+void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
// The FP spill hasn't been inserted yet, so keep it around.
for (auto &R : SGPRToVGPRSpills) {
if (R.first != FramePointerSaveIndex)
@@ -332,6 +400,11 @@ void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI)
++i)
if (i != FramePointerSaveIndex)
MFI.setStackID(i, TargetStackID::Default);
+
+ for (auto &R : VGPRToAGPRSpills) {
+ if (R.second.FullyAllocated)
+ MFI.RemoveStackObject(R.first);
+ }
}
MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index a8928dacf77..f19b20ceb5d 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -442,6 +442,11 @@ public:
SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
};
+ struct VGPRSpillToAGPR {
+ SmallVector<MCPhysReg, 32> Lanes;
+ bool FullyAllocated = false;
+ };
+
SparseBitVector<> WWMReservedRegs;
void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); }
@@ -456,6 +461,14 @@ private:
unsigned NumVGPRSpillLanes = 0;
SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
+ DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
+
+ // AGPRs used for VGPR spills.
+ SmallVector<MCPhysReg, 32> SpillAGPR;
+
+ // VGPRs used for AGPR spills.
+ SmallVector<MCPhysReg, 32> SpillVGPR;
+
public: // FIXME
/// If this is set, an SGPR used for save/restore of the register used for the
/// frame pointer.
@@ -477,6 +490,20 @@ public:
return SpillVGPRs;
}
+ ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
+ return SpillAGPR;
+ }
+
+ ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
+ return SpillVGPR;
+ }
+
+ MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
+ auto I = VGPRToAGPRSpills.find(FrameIndex);
+ return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
+ : I->second.Lanes[Lane];
+ }
+
AMDGPU::SIModeRegisterDefaults getMode() const {
return Mode;
}
@@ -484,7 +511,8 @@ public:
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
unsigned NumLane) const;
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
- void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
+ bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
+ void removeDeadFrameIndices(MachineFrameInfo &MFI);
bool hasCalculatedTID() const { return TIDReg != 0; };
unsigned getTIDReg() const { return TIDReg; };
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 9fde16edade..7c2839ccb4c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -256,6 +256,13 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, Reg);
}
+ // FIXME: Stop using reserved registers for this.
+ for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
+ reserveRegisterTuples(Reserved, Reg);
+
+ for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
+ reserveRegisterTuples(Reserved, Reg);
+
return Reserved;
}
@@ -448,10 +455,19 @@ const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
static unsigned getNumSubRegsForSpillOp(unsigned Op) {
switch (Op) {
+ case AMDGPU::SI_SPILL_S1024_SAVE:
+ case AMDGPU::SI_SPILL_S1024_RESTORE:
+ case AMDGPU::SI_SPILL_V1024_SAVE:
+ case AMDGPU::SI_SPILL_V1024_RESTORE:
+ case AMDGPU::SI_SPILL_A1024_SAVE:
+ case AMDGPU::SI_SPILL_A1024_RESTORE:
+ return 32;
case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_V512_SAVE:
case AMDGPU::SI_SPILL_V512_RESTORE:
+ case AMDGPU::SI_SPILL_A512_SAVE:
+ case AMDGPU::SI_SPILL_A512_RESTORE:
return 16;
case AMDGPU::SI_SPILL_S256_SAVE:
case AMDGPU::SI_SPILL_S256_RESTORE:
@@ -467,6 +483,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_S128_RESTORE:
case AMDGPU::SI_SPILL_V128_SAVE:
case AMDGPU::SI_SPILL_V128_RESTORE:
+ case AMDGPU::SI_SPILL_A128_SAVE:
+ case AMDGPU::SI_SPILL_A128_RESTORE:
return 4;
case AMDGPU::SI_SPILL_S96_SAVE:
case AMDGPU::SI_SPILL_S96_RESTORE:
@@ -477,11 +495,15 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_S64_RESTORE:
case AMDGPU::SI_SPILL_V64_SAVE:
case AMDGPU::SI_SPILL_V64_RESTORE:
+ case AMDGPU::SI_SPILL_A64_SAVE:
+ case AMDGPU::SI_SPILL_A64_RESTORE:
return 2;
case AMDGPU::SI_SPILL_S32_SAVE:
case AMDGPU::SI_SPILL_S32_RESTORE:
case AMDGPU::SI_SPILL_V32_SAVE:
case AMDGPU::SI_SPILL_V32_RESTORE:
+ case AMDGPU::SI_SPILL_A32_SAVE:
+ case AMDGPU::SI_SPILL_A32_RESTORE:
return 1;
default: llvm_unreachable("Invalid spill opcode");
}
@@ -541,6 +563,35 @@ static int getOffsetMUBUFLoad(unsigned Opc) {
}
}
+static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI,
+ int Index,
+ unsigned Lane,
+ unsigned ValueReg,
+ bool IsKill) {
+ MachineBasicBlock *MBB = MI->getParent();
+ MachineFunction *MF = MI->getParent()->getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
+
+ if (Reg == AMDGPU::NoRegister)
+ return MachineInstrBuilder();
+
+ bool IsStore = MI->mayStore();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
+
+ unsigned Dst = IsStore ? Reg : ValueReg;
+ unsigned Src = IsStore ? ValueReg : Reg;
+ unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32
+ : AMDGPU::V_ACCVGPR_READ_B32;
+
+ return BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
+ .addReg(Src, getKillRegState(IsKill));
+}
+
// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
// need to handle the case where an SGPR may need to be spilled while spilling.
static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
@@ -559,6 +610,9 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
return false;
const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
+ if (spillVGPRtoAGPR(MI, Index, 0, Reg->getReg(), false).getInstr())
+ return true;
+
MachineInstrBuilder NewMI =
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
.add(*Reg)
@@ -611,6 +665,10 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned Align = MFI.getObjectAlignment(Index);
const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
+ Register TmpReg =
+ hasAGPRs(RC) ? TII->getNamedOperand(*MI, AMDGPU::OpName::tmp)->getReg()
+ : Register();
+
assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
if (!isUInt<12>(Offset + Size - EltSize)) {
@@ -659,21 +717,38 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
SrcDstRegState |= getKillRegState(IsKill);
}
- MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
- MachineMemOperand *NewMMO
- = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
- EltSize, MinAlign(Align, EltSize * i));
-
- auto MIB = BuildMI(*MBB, MI, DL, Desc)
- .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
- .addReg(ScratchRsrcReg)
- .addReg(SOffset, SOffsetRegState)
- .addImm(Offset)
- .addImm(0) // glc
- .addImm(0) // slc
- .addImm(0) // tfe
- .addImm(0) // dlc
- .addMemOperand(NewMMO);
+ auto MIB = spillVGPRtoAGPR(MI, Index, i, SubReg, IsKill);
+
+ if (!MIB.getInstr()) {
+ unsigned FinalReg = SubReg;
+ if (TmpReg != AMDGPU::NoRegister) {
+ if (IsStore)
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
+ .addReg(SubReg, getKillRegState(IsKill));
+ SubReg = TmpReg;
+ }
+
+ MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
+ MachineMemOperand *NewMMO
+ = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
+ EltSize, MinAlign(Align, EltSize * i));
+
+ MIB = BuildMI(*MBB, MI, DL, Desc)
+ .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
+ .addReg(ScratchRsrcReg)
+ .addReg(SOffset, SOffsetRegState)
+ .addImm(Offset)
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .addImm(0) // dlc
+ .addMemOperand(NewMMO);
+
+ if (!IsStore && TmpReg != AMDGPU::NoRegister)
+ MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
+ FinalReg)
+ .addReg(TmpReg, RegState::Kill);
+ }
if (NumSubRegs > 1)
MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
@@ -1038,6 +1113,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
int FI,
RegScavenger *RS) const {
switch (MI->getOpcode()) {
+ case AMDGPU::SI_SPILL_S1024_SAVE:
case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S256_SAVE:
case AMDGPU::SI_SPILL_S160_SAVE:
@@ -1046,6 +1122,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
case AMDGPU::SI_SPILL_S64_SAVE:
case AMDGPU::SI_SPILL_S32_SAVE:
return spillSGPR(MI, FI, RS, true);
+ case AMDGPU::SI_SPILL_S1024_RESTORE:
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S256_RESTORE:
case AMDGPU::SI_SPILL_S160_RESTORE:
@@ -1080,6 +1157,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
switch (MI->getOpcode()) {
// SGPR register spill
+ case AMDGPU::SI_SPILL_S1024_SAVE:
case AMDGPU::SI_SPILL_S512_SAVE:
case AMDGPU::SI_SPILL_S256_SAVE:
case AMDGPU::SI_SPILL_S160_SAVE:
@@ -1092,6 +1170,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
// SGPR register restore
+ case AMDGPU::SI_SPILL_S1024_RESTORE:
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S256_RESTORE:
case AMDGPU::SI_SPILL_S160_RESTORE:
@@ -1104,13 +1183,19 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
}
// VGPR register spill
+ case AMDGPU::SI_SPILL_V1024_SAVE:
case AMDGPU::SI_SPILL_V512_SAVE:
case AMDGPU::SI_SPILL_V256_SAVE:
case AMDGPU::SI_SPILL_V160_SAVE:
case AMDGPU::SI_SPILL_V128_SAVE:
case AMDGPU::SI_SPILL_V96_SAVE:
case AMDGPU::SI_SPILL_V64_SAVE:
- case AMDGPU::SI_SPILL_V32_SAVE: {
+ case AMDGPU::SI_SPILL_V32_SAVE:
+ case AMDGPU::SI_SPILL_A1024_SAVE:
+ case AMDGPU::SI_SPILL_A512_SAVE:
+ case AMDGPU::SI_SPILL_A128_SAVE:
+ case AMDGPU::SI_SPILL_A64_SAVE:
+ case AMDGPU::SI_SPILL_A32_SAVE: {
const MachineOperand *VData = TII->getNamedOperand(*MI,
AMDGPU::OpName::vdata);
assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
@@ -1134,7 +1219,13 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_V128_RESTORE:
case AMDGPU::SI_SPILL_V160_RESTORE:
case AMDGPU::SI_SPILL_V256_RESTORE:
- case AMDGPU::SI_SPILL_V512_RESTORE: {
+ case AMDGPU::SI_SPILL_V512_RESTORE:
+ case AMDGPU::SI_SPILL_V1024_RESTORE:
+ case AMDGPU::SI_SPILL_A32_RESTORE:
+ case AMDGPU::SI_SPILL_A64_RESTORE:
+ case AMDGPU::SI_SPILL_A128_RESTORE:
+ case AMDGPU::SI_SPILL_A512_RESTORE:
+ case AMDGPU::SI_SPILL_A1024_RESTORE: {
const MachineOperand *VData = TII->getNamedOperand(*MI,
AMDGPU::OpName::vdata);
assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
OpenPOWER on IntegriCloud