diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2016-03-04 18:31:18 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2016-03-04 18:31:18 +0000 |
commit | 649b5db557d5c7005e2f2ca9d893377bd733dc2e (patch) | |
tree | ded01c957b5dbae57a0574c516cb2d0584682760 /llvm/lib | |
parent | 3b8f6126ac50880a17ea71fb82c828c9dc818d81 (diff) | |
download | bcm5719-llvm-649b5db557d5c7005e2f2ca9d893377bd733dc2e.tar.gz bcm5719-llvm-649b5db557d5c7005e2f2ca9d893377bd733dc2e.zip |
AMDGPU/SI: Add support for spiling SGPRs to scratch buffer
Summary:
This is necessary for when we run out of VGPRs and can no
longer use v_{read,write}_lane for spilling SGPRs.
Reviewers: arsenm
Subscribers: arsenm, llvm-commits
Differential Revision: http://reviews.llvm.org/D17592
llvm-svn: 262732
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 86 |
5 files changed, 81 insertions, 30 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 5c9e814088a..bbc19fdc715 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -590,6 +590,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIndex) // frame_idx .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset + .addImm(0) // offset .addMemOperand(MMO); } @@ -672,6 +673,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FrameIndex) // frame_idx .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset + .addImm(0) // offset .addMemOperand(MMO); } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 7f9f21c5862..471a9e54203 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2029,7 +2029,7 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> { def _SAVE : InstSI < (outs), (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, - SReg_32:$scratch_offset), + SReg_32:$scratch_offset, i32imm:$offset), "", []> { let mayStore = 1; let mayLoad = 0; @@ -2037,7 +2037,8 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> { def _RESTORE : InstSI < (outs vgpr_class:$dst), - (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), + (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset, + i32imm:$offset), "", []> { let mayStore = 0; let mayLoad = 1; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index c5ecfd0ac73..6b8d2566597 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -162,7 +162,7 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( MachineFunction *MF, unsigned FrameIndex, unsigned SubIdx) { - const MachineFrameInfo *FrameInfo = MF->getFrameInfo(); + MachineFrameInfo *FrameInfo = MF->getFrameInfo(); const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( MF->getSubtarget<AMDGPUSubtarget>().getRegisterInfo()); MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -173,19 +173,15 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( unsigned Lane = (Offset / 4) % 64; struct SpilledReg Spill; + Spill.Lane = Lane; if (!LaneVGPRs.count(LaneVGPRIdx)) { unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass); - if (LaneVGPR == AMDGPU::NoRegister) { - LLVMContext &Ctx = MF->getFunction()->getContext(); - Ctx.emitError("Ran out of VGPRs for spilling SGPR"); + if (LaneVGPR == AMDGPU::NoRegister) + // We have no VGPRs left for spilling SGPRs. + return Spill; - // When compiling from inside Mesa, the compilation continues. - // Select an arbitrary register to avoid triggering assertions - // during subsequent passes. - LaneVGPR = AMDGPU::VGPR0; - } LaneVGPRs[LaneVGPRIdx] = LaneVGPR; @@ -198,7 +194,6 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg( } Spill.VGPR = LaneVGPRs[LaneVGPRIdx]; - Spill.Lane = Lane; return Spill; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 787b3bb7a75..2f4e494faaf 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -113,8 +113,9 @@ public: unsigned VGPR; int Lane; SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) { } - SpilledReg() : VGPR(0), Lane(-1) { } + SpilledReg() : VGPR(AMDGPU::NoRegister), Lane(-1) { } bool hasLane() { return Lane != -1;} + bool hasReg() { return VGPR != AMDGPU::NoRegister;} }; // SIMachineFunctionInfo definition diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 80b446939e4..199025aec26 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -307,6 +307,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); + unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), @@ -314,15 +315,37 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); - BuildMI(*MBB, MI, DL, - TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), - Spill.VGPR) - .addReg(SubReg) - .addImm(Spill.Lane); - - // FIXME: Since this spills to another register instead of an actual - // frame index, we should delete the frame index when all references to - // it are fixed. + if (Spill.hasReg()) { + BuildMI(*MBB, MI, DL, + TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), + Spill.VGPR) + .addReg(SubReg) + .addImm(Spill.Lane); + + // FIXME: Since this spills to another register instead of an actual + // frame index, we should delete the frame index when all references to + // it are fixed. + } else { + // Spill SGPR to a frame index. + // FIXME we should use S_STORE_DWORD here for VI. + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg) + .addReg(SubReg); + + unsigned Size = FrameInfo->getObjectSize(Index); + unsigned Align = FrameInfo->getObjectAlignment(Index); + MachinePointerInfo PtrInfo + = MachinePointerInfo::getFixedStack(*MF, Index); + MachineMemOperand *MMO + = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + Size, Align); + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) + .addReg(TmpReg) // src + .addFrameIndex(Index) // frame_idx + .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc + .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset + .addImm(i * 4) // offset + .addMemOperand(MMO); + } } MI->eraseFromParent(); break; @@ -335,6 +358,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_S64_RESTORE: case AMDGPU::SI_SPILL_S32_RESTORE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); + unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(), @@ -342,12 +366,38 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, struct SIMachineFunctionInfo::SpilledReg Spill = MFI->getSpilledReg(MF, Index, i); - BuildMI(*MBB, MI, DL, - TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), - SubReg) - .addReg(Spill.VGPR) - .addImm(Spill.Lane) - .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); + if (Spill.hasReg()) { + BuildMI(*MBB, MI, DL, + TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), + SubReg) + .addReg(Spill.VGPR) + .addImm(Spill.Lane) + .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); + } else { + // Restore SGPR from a stack slot. + // FIXME: We should use S_LOAD_DWORD here for VI. + + unsigned Align = FrameInfo->getObjectAlignment(Index); + unsigned Size = FrameInfo->getObjectSize(Index); + + MachinePointerInfo PtrInfo + = MachinePointerInfo::getFixedStack(*MF, Index); + + MachineMemOperand *MMO = MF->getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, Size, Align); + + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) + .addFrameIndex(Index) // frame_idx + .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc + .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset + .addImm(i * 4) // offset + .addMemOperand(MMO); + BuildMI(*MBB, MI, DL, + TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32), SubReg) + .addReg(TmpReg) + .addImm(0) + .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine); + } } // TODO: only do this when it is needed @@ -381,7 +431,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), - FrameInfo->getObjectOffset(Index)); + FrameInfo->getObjectOffset(Index) + + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm()); MI->eraseFromParent(); break; case AMDGPU::SI_SPILL_V32_RESTORE: @@ -394,7 +445,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), - FrameInfo->getObjectOffset(Index)); + FrameInfo->getObjectOffset(Index) + + TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm()); MI->eraseFromParent(); break; } |