summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2016-03-04 18:31:18 +0000
committerTom Stellard <thomas.stellard@amd.com>2016-03-04 18:31:18 +0000
commit649b5db557d5c7005e2f2ca9d893377bd733dc2e (patch)
treeded01c957b5dbae57a0574c516cb2d0584682760 /llvm/lib
parent3b8f6126ac50880a17ea71fb82c828c9dc818d81 (diff)
downloadbcm5719-llvm-649b5db557d5c7005e2f2ca9d893377bd733dc2e.tar.gz
bcm5719-llvm-649b5db557d5c7005e2f2ca9d893377bd733dc2e.zip
AMDGPU/SI: Add support for spiling SGPRs to scratch buffer
Summary: This is necessary for when we run out of VGPRs and can no longer use v_{read,write}_lane for spilling SGPRs. Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D17592 llvm-svn: 262732
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td5
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp86
5 files changed, 81 insertions, 30 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 5c9e814088a..bbc19fdc715 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -590,6 +590,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FrameIndex) // frame_idx
.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
.addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
+ .addImm(0) // offset
.addMemOperand(MMO);
}
@@ -672,6 +673,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FrameIndex) // frame_idx
.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
.addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
+ .addImm(0) // offset
.addMemOperand(MMO);
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 7f9f21c5862..471a9e54203 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2029,7 +2029,7 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
def _SAVE : InstSI <
(outs),
(ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
- SReg_32:$scratch_offset),
+ SReg_32:$scratch_offset, i32imm:$offset),
"", []> {
let mayStore = 1;
let mayLoad = 0;
@@ -2037,7 +2037,8 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
def _RESTORE : InstSI <
(outs vgpr_class:$dst),
- (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
+ (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset,
+ i32imm:$offset),
"", []> {
let mayStore = 0;
let mayLoad = 1;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index c5ecfd0ac73..6b8d2566597 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -162,7 +162,7 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
MachineFunction *MF,
unsigned FrameIndex,
unsigned SubIdx) {
- const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+ MachineFrameInfo *FrameInfo = MF->getFrameInfo();
const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo());
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -173,19 +173,15 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
unsigned Lane = (Offset / 4) % 64;
struct SpilledReg Spill;
+ Spill.Lane = Lane;
if (!LaneVGPRs.count(LaneVGPRIdx)) {
unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
- if (LaneVGPR == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("Ran out of VGPRs for spilling SGPR");
+ if (LaneVGPR == AMDGPU::NoRegister)
+ // We have no VGPRs left for spilling SGPRs.
+ return Spill;
- // When compiling from inside Mesa, the compilation continues.
- // Select an arbitrary register to avoid triggering assertions
- // during subsequent passes.
- LaneVGPR = AMDGPU::VGPR0;
- }
LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
@@ -198,7 +194,6 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
}
Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
- Spill.Lane = Lane;
return Spill;
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 787b3bb7a75..2f4e494faaf 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -113,8 +113,9 @@ public:
unsigned VGPR;
int Lane;
SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { }
- SpilledReg() : VGPR(0), Lane(-1) { }
+ SpilledReg() : VGPR(AMDGPU::NoRegister), Lane(-1) { }
bool hasLane() { return Lane != -1;}
+ bool hasReg() { return VGPR != AMDGPU::NoRegister;}
};
// SIMachineFunctionInfo definition
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 80b446939e4..199025aec26 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -307,6 +307,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_S64_SAVE:
case AMDGPU::SI_SPILL_S32_SAVE: {
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+ unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
@@ -314,15 +315,37 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
struct SIMachineFunctionInfo::SpilledReg Spill =
MFI->getSpilledReg(MF, Index, i);
- BuildMI(*MBB, MI, DL,
- TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
- Spill.VGPR)
- .addReg(SubReg)
- .addImm(Spill.Lane);
-
- // FIXME: Since this spills to another register instead of an actual
- // frame index, we should delete the frame index when all references to
- // it are fixed.
+ if (Spill.hasReg()) {
+ BuildMI(*MBB, MI, DL,
+ TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
+ Spill.VGPR)
+ .addReg(SubReg)
+ .addImm(Spill.Lane);
+
+ // FIXME: Since this spills to another register instead of an actual
+ // frame index, we should delete the frame index when all references to
+ // it are fixed.
+ } else {
+ // Spill SGPR to a frame index.
+ // FIXME we should use S_STORE_DWORD here for VI.
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+ .addReg(SubReg);
+
+ unsigned Size = FrameInfo->getObjectSize(Index);
+ unsigned Align = FrameInfo->getObjectAlignment(Index);
+ MachinePointerInfo PtrInfo
+ = MachinePointerInfo::getFixedStack(*MF, Index);
+ MachineMemOperand *MMO
+ = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ Size, Align);
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
+ .addReg(TmpReg) // src
+ .addFrameIndex(Index) // frame_idx
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
+ .addImm(i * 4) // offset
+ .addMemOperand(MMO);
+ }
}
MI->eraseFromParent();
break;
@@ -335,6 +358,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_S64_RESTORE:
case AMDGPU::SI_SPILL_S32_RESTORE: {
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+ unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
@@ -342,12 +366,38 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
struct SIMachineFunctionInfo::SpilledReg Spill =
MFI->getSpilledReg(MF, Index, i);
- BuildMI(*MBB, MI, DL,
- TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
- SubReg)
- .addReg(Spill.VGPR)
- .addImm(Spill.Lane)
- .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
+ if (Spill.hasReg()) {
+ BuildMI(*MBB, MI, DL,
+ TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
+ SubReg)
+ .addReg(Spill.VGPR)
+ .addImm(Spill.Lane)
+ .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
+ } else {
+ // Restore SGPR from a stack slot.
+ // FIXME: We should use S_LOAD_DWORD here for VI.
+
+ unsigned Align = FrameInfo->getObjectAlignment(Index);
+ unsigned Size = FrameInfo->getObjectSize(Index);
+
+ MachinePointerInfo PtrInfo
+ = MachinePointerInfo::getFixedStack(*MF, Index);
+
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOLoad, Size, Align);
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
+ .addFrameIndex(Index) // frame_idx
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
+ .addImm(i * 4) // offset
+ .addMemOperand(MMO);
+ BuildMI(*MBB, MI, DL,
+ TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg)
+ .addReg(TmpReg)
+ .addImm(0)
+ .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
+ }
}
// TODO: only do this when it is needed
@@ -381,7 +431,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
- FrameInfo->getObjectOffset(Index));
+ FrameInfo->getObjectOffset(Index) +
+ TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm());
MI->eraseFromParent();
break;
case AMDGPU::SI_SPILL_V32_RESTORE:
@@ -394,7 +445,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
- FrameInfo->getObjectOffset(Index));
+ FrameInfo->getObjectOffset(Index) +
+ TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm());
MI->eraseFromParent();
break;
}
OpenPOWER on IntegriCloud