diff options
| author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-07-11 21:54:13 +0000 |
|---|---|---|
| committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-07-11 21:54:13 +0000 |
| commit | 937ff6e701bac55ee0eecc575f21b03aa8fedb3b (patch) | |
| tree | ab231945bd31fe9ffb9df412e2aa9375a3f0ed82 /llvm/lib/Target/AMDGPU | |
| parent | 18b78bfe9e6bdf630f2acbcf813d18cefed372cb (diff) | |
| download | bcm5719-llvm-937ff6e701bac55ee0eecc575f21b03aa8fedb3b.tar.gz bcm5719-llvm-937ff6e701bac55ee0eecc575f21b03aa8fedb3b.zip | |
[AMDGPU] gfx908 agpr spilling
Differential Revision: https://reviews.llvm.org/D64594
llvm-svn: 365833
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 89 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 47 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 44 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 75 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 30 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 125 |
7 files changed, 367 insertions, 45 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 4e968b67869..d73f2b4abae 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -913,7 +913,6 @@ static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) { return true; } - #ifndef NDEBUG static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI, Optional<int> FramePointerSaveIndex) { @@ -947,6 +946,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( const SIRegisterInfo *TRI = ST.getRegisterInfo(); SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); + FuncInfo->removeDeadFrameIndices(MFI); assert(allSGPRSpillsAreDead(MFI, None) && "SGPR spill should have been removed in SILowerSGPRSpills"); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 88d37992072..f7c23b3d9fb 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -976,6 +976,8 @@ static unsigned getSGPRSpillSaveOpcode(unsigned Size) { return AMDGPU::SI_SPILL_S256_SAVE; case 64: return AMDGPU::SI_SPILL_S512_SAVE; + case 128: + return AMDGPU::SI_SPILL_S1024_SAVE; default: llvm_unreachable("unknown register size"); } @@ -997,6 +999,25 @@ static unsigned getVGPRSpillSaveOpcode(unsigned Size) { return AMDGPU::SI_SPILL_V256_SAVE; case 64: return AMDGPU::SI_SPILL_V512_SAVE; + case 128: + return AMDGPU::SI_SPILL_V1024_SAVE; + default: + llvm_unreachable("unknown register size"); + } +} + +static unsigned getAGPRSpillSaveOpcode(unsigned Size) { + switch (Size) { + case 4: + return AMDGPU::SI_SPILL_A32_SAVE; + case 8: + return AMDGPU::SI_SPILL_A64_SAVE; + case 16: + return AMDGPU::SI_SPILL_A128_SAVE; + case 64: + return AMDGPU::SI_SPILL_A512_SAVE; + case 128: + return AMDGPU::SI_SPILL_A1024_SAVE; default: llvm_unreachable("unknown register size"); } @@ -1055,17 +1076,22 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, return; } - assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); - - unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize); + unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize) + : getVGPRSpillSaveOpcode(SpillSize); MFI->setHasSpilledVGPRs(); - BuildMI(MBB, MI, DL, get(Opcode)) - .addReg(SrcReg, getKillRegState(isKill)) // data - .addFrameIndex(FrameIndex) // addr - .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc - .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset - .addImm(0) // offset - .addMemOperand(MMO); + + auto MIB = BuildMI(MBB, MI, DL, get(Opcode)); + if (RI.hasAGPRs(RC)) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + MIB.addReg(Tmp, RegState::Define); + } + MIB.addReg(SrcReg, getKillRegState(isKill)) // data + .addFrameIndex(FrameIndex) // addr + .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc + .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset + .addImm(0) // offset + .addMemOperand(MMO); } static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { @@ -1084,6 +1110,8 @@ static unsigned getSGPRSpillRestoreOpcode(unsigned Size) { return AMDGPU::SI_SPILL_S256_RESTORE; case 64: return AMDGPU::SI_SPILL_S512_RESTORE; + case 128: + return AMDGPU::SI_SPILL_S1024_RESTORE; default: llvm_unreachable("unknown register size"); } @@ -1105,6 +1133,25 @@ static unsigned getVGPRSpillRestoreOpcode(unsigned Size) { return AMDGPU::SI_SPILL_V256_RESTORE; case 64: return AMDGPU::SI_SPILL_V512_RESTORE; + case 128: + return AMDGPU::SI_SPILL_V1024_RESTORE; + default: + llvm_unreachable("unknown register size"); + } +} + +static unsigned getAGPRSpillRestoreOpcode(unsigned Size) { + switch (Size) { + case 4: + return AMDGPU::SI_SPILL_A32_RESTORE; + case 8: + return AMDGPU::SI_SPILL_A64_RESTORE; + case 16: + return AMDGPU::SI_SPILL_A128_RESTORE; + case 64: + return AMDGPU::SI_SPILL_A512_RESTORE; + case 128: + return AMDGPU::SI_SPILL_A1024_RESTORE; default: llvm_unreachable("unknown register size"); } @@ -1156,15 +1203,19 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, return; } - assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected"); - - unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize); - BuildMI(MBB, MI, DL, get(Opcode), DestReg) - .addFrameIndex(FrameIndex) // vaddr - .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc - .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset - .addImm(0) // offset - .addMemOperand(MMO); + unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize) + : getVGPRSpillRestoreOpcode(SpillSize); + auto MIB = BuildMI(MBB, MI, DL, get(Opcode), DestReg); + if (RI.hasAGPRs(RC)) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + MIB.addReg(Tmp, RegState::Define); + } + MIB.addFrameIndex(FrameIndex) // vaddr + .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc + .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset + .addImm(0) // offset + .addMemOperand(MMO); } /// \param @Offset Offset in bytes of the FrameIndex being spilled diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 4831ede3d54..05fdd3065aa 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -513,6 +513,7 @@ defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>; defm SI_SPILL_S160 : SI_SPILL_SGPR <SReg_160>; defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>; defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>; +defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>; multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> { let UseNamedOperandTable = 1, VGPRSpill = 1, @@ -524,7 +525,9 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> { let mayStore = 1; let mayLoad = 0; // (2 * 4) + (8 * num_subregs) bytes maximum - let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8); + int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8); + // Size field is unsigned char and cannot fit more. + let Size = !if(!le(MaxSize, 256), MaxSize, 252); } def _RESTORE : VPseudoInstSI < @@ -535,7 +538,9 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> { let mayLoad = 1; // (2 * 4) + (8 * num_subregs) bytes maximum - let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8); + int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8); + // Size field is unsigned char and cannot fit more. + let Size = !if(!le(MaxSize, 256), MaxSize, 252); } } // End UseNamedOperandTable = 1, VGPRSpill = 1, SchedRW = [WriteVMEM] } @@ -547,6 +552,44 @@ defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>; defm SI_SPILL_V160 : SI_SPILL_VGPR <VReg_160>; defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>; defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>; +defm SI_SPILL_V1024 : SI_SPILL_VGPR <VReg_1024>; + +multiclass SI_SPILL_AGPR <RegisterClass vgpr_class> { + let UseNamedOperandTable = 1, VGPRSpill = 1, + Constraints = "@earlyclobber $tmp", + SchedRW = [WriteVMEM] in { + def _SAVE : VPseudoInstSI < + (outs VGPR_32:$tmp), + (ins vgpr_class:$vdata, i32imm:$vaddr, SReg_128:$srsrc, + SReg_32:$soffset, i32imm:$offset)> { + let mayStore = 1; + let mayLoad = 0; + // (2 * 4) + (16 * num_subregs) bytes maximum + int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 4), 8); + // Size field is unsigned char and cannot fit more. + let Size = !if(!le(MaxSize, 256), MaxSize, 252); + } + + def _RESTORE : VPseudoInstSI < + (outs vgpr_class:$vdata, VGPR_32:$tmp), + (ins i32imm:$vaddr, SReg_128:$srsrc, SReg_32:$soffset, + i32imm:$offset)> { + let mayStore = 0; + let mayLoad = 1; + + // (2 * 4) + (16 * num_subregs) bytes maximum + int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 4), 8); + // Size field is unsigned char and cannot fit more. + let Size = !if(!le(MaxSize, 256), MaxSize, 252); + } + } // End UseNamedOperandTable = 1, VGPRSpill = 1, SchedRW = [WriteVMEM] +} + +defm SI_SPILL_A32 : SI_SPILL_AGPR <AGPR_32>; +defm SI_SPILL_A64 : SI_SPILL_AGPR <AReg_64>; +defm SI_SPILL_A128 : SI_SPILL_AGPR <AReg_128>; +defm SI_SPILL_A512 : SI_SPILL_AGPR <AReg_512>; +defm SI_SPILL_A1024 : SI_SPILL_AGPR <AReg_1024>; def SI_PC_ADD_REL_OFFSET : SPseudoInstSI < (outs SReg_64:$dst), diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 7838a59b633..abfe89491e7 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -37,6 +37,12 @@ using MBBVector = SmallVector<MachineBasicBlock *, 4>; namespace { +static cl::opt<bool> EnableSpillVGPRToAGPR( + "amdgpu-spill-vgpr-to-agpr", + cl::desc("Enable spilling VGPRs to AGPRs"), + cl::ReallyHidden, + cl::init(true)); + class SILowerSGPRSpills : public MachineFunctionPass { private: const SIRegisterInfo *TRI = nullptr; @@ -242,10 +248,22 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { return false; } + MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); + bool AllSGPRSpilledToVGPRs = false; + const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs() + && EnableSpillVGPRToAGPR; + bool MadeChange = false; - if (TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) { + const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts(); + + // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be + // handled as SpilledToReg in regular PrologEpilogInserter. + if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) || + SpillVGPRToAGPR) { + AllSGPRSpilledToVGPRs = true; + // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs // are spilled to VGPRs, in which case we can eliminate the stack usage. // @@ -257,6 +275,18 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *I; Next = std::next(I); + if (SpillToAGPR && TII->isVGPRSpill(MI)) { + // Try to eliminate stack used by VGPR spills before frame + // finalization. + unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(), + AMDGPU::OpName::vaddr); + int FI = MI.getOperand(FIOp).getIndex(); + unsigned VReg = TII->getNamedOperand(MI, AMDGPU::OpName::vdata) + ->getReg(); + if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, TRI->isAGPR(MRI, VReg))) + TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr); + } + if (!TII->isSGPRSpill(MI)) continue; @@ -266,18 +296,24 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr); (void)Spilled; assert(Spilled && "failed to spill SGPR to VGPR when allocated"); - } - + } else + AllSGPRSpilledToVGPRs = false; } } for (MachineBasicBlock &MBB : MF) { for (auto SSpill : FuncInfo->getSGPRSpillVGPRs()) MBB.addLiveIn(SSpill.VGPR); + + for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs()) + MBB.addLiveIn(Reg); + + for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs()) + MBB.addLiveIn(Reg); + MBB.sortUniqueLiveIns(); } - FuncInfo->removeSGPRToVGPRFrameIndices(MFI); MadeChange = true; } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index a3f6caaacc8..46da974a2f4 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -319,7 +319,75 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, return true; } -void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) { +/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI. +/// Either AGPR is spilled to VGPR to vice versa. +/// Returns true if a \p FI can be eliminated completely. +bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, + int FI, + bool isAGPRtoVGPR) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + + assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI)); + + auto &Spill = VGPRToAGPRSpills[FI]; + + // This has already been allocated. + if (!Spill.Lanes.empty()) + return Spill.FullyAllocated; + + unsigned Size = FrameInfo.getObjectSize(FI); + unsigned NumLanes = Size / 4; + Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister); + + const TargetRegisterClass &RC = + isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass; + auto Regs = RC.getRegisters(); + + auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR; + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + Spill.FullyAllocated = true; + + // FIXME: Move allocation logic out of MachineFunctionInfo and initialize + // once. + BitVector OtherUsedRegs; + OtherUsedRegs.resize(TRI->getNumRegs()); + + const uint32_t *CSRMask = + TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv()); + if (CSRMask) + OtherUsedRegs.setBitsInMask(CSRMask); + + // TODO: Should include register tuples, but doesn't matter with current + // usage. + for (MCPhysReg Reg : SpillAGPR) + OtherUsedRegs.set(Reg); + for (MCPhysReg Reg : SpillVGPR) + OtherUsedRegs.set(Reg); + + SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin(); + for (unsigned I = 0; I < NumLanes; ++I) { + NextSpillReg = std::find_if( + NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) { + return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) && + !OtherUsedRegs[Reg]; + }); + + if (NextSpillReg == Regs.end()) { // Registers exhausted + Spill.FullyAllocated = false; + break; + } + + OtherUsedRegs.set(*NextSpillReg); + SpillRegs.push_back(*NextSpillReg); + Spill.Lanes[I] = *NextSpillReg++; + } + + return Spill.FullyAllocated; +} + +void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { // The FP spill hasn't been inserted yet, so keep it around. for (auto &R : SGPRToVGPRSpills) { if (R.first != FramePointerSaveIndex) @@ -332,6 +400,11 @@ void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) ++i) if (i != FramePointerSaveIndex) MFI.setStackID(i, TargetStackID::Default); + + for (auto &R : VGPRToAGPRSpills) { + if (R.second.FullyAllocated) + MFI.RemoveStackObject(R.first); + } } MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const { diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index a8928dacf77..f19b20ceb5d 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -442,6 +442,11 @@ public: SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {} }; + struct VGPRSpillToAGPR { + SmallVector<MCPhysReg, 32> Lanes; + bool FullyAllocated = false; + }; + SparseBitVector<> WWMReservedRegs; void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); } @@ -456,6 +461,14 @@ private: unsigned NumVGPRSpillLanes = 0; SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; + DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills; + + // AGPRs used for VGPR spills. + SmallVector<MCPhysReg, 32> SpillAGPR; + + // VGPRs used for AGPR spills. + SmallVector<MCPhysReg, 32> SpillVGPR; + public: // FIXME /// If this is set, an SGPR used for save/restore of the register used for the /// frame pointer. @@ -477,6 +490,20 @@ public: return SpillVGPRs; } + ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const { + return SpillAGPR; + } + + ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const { + return SpillVGPR; + } + + MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const { + auto I = VGPRToAGPRSpills.find(FrameIndex); + return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister + : I->second.Lanes[Lane]; + } + AMDGPU::SIModeRegisterDefaults getMode() const { return Mode; } @@ -484,7 +511,8 @@ public: bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const; bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); - void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); + bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR); + void removeDeadFrameIndices(MachineFrameInfo &MFI); bool hasCalculatedTID() const { return TIDReg != 0; }; unsigned getTIDReg() const { return TIDReg; }; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 9fde16edade..7c2839ccb4c 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -256,6 +256,13 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { reserveRegisterTuples(Reserved, Reg); } + // FIXME: Stop using reserved registers for this. + for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs()) + reserveRegisterTuples(Reserved, Reg); + + for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs()) + reserveRegisterTuples(Reserved, Reg); + return Reserved; } @@ -448,10 +455,19 @@ const TargetRegisterClass *SIRegisterInfo::getPointerRegClass( static unsigned getNumSubRegsForSpillOp(unsigned Op) { switch (Op) { + case AMDGPU::SI_SPILL_S1024_SAVE: + case AMDGPU::SI_SPILL_S1024_RESTORE: + case AMDGPU::SI_SPILL_V1024_SAVE: + case AMDGPU::SI_SPILL_V1024_RESTORE: + case AMDGPU::SI_SPILL_A1024_SAVE: + case AMDGPU::SI_SPILL_A1024_RESTORE: + return 32; case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V512_RESTORE: + case AMDGPU::SI_SPILL_A512_SAVE: + case AMDGPU::SI_SPILL_A512_RESTORE: return 16; case AMDGPU::SI_SPILL_S256_SAVE: case AMDGPU::SI_SPILL_S256_RESTORE: @@ -467,6 +483,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) { case AMDGPU::SI_SPILL_S128_RESTORE: case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V128_RESTORE: + case AMDGPU::SI_SPILL_A128_SAVE: + case AMDGPU::SI_SPILL_A128_RESTORE: return 4; case AMDGPU::SI_SPILL_S96_SAVE: case AMDGPU::SI_SPILL_S96_RESTORE: @@ -477,11 +495,15 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) { case AMDGPU::SI_SPILL_S64_RESTORE: case AMDGPU::SI_SPILL_V64_SAVE: case AMDGPU::SI_SPILL_V64_RESTORE: + case AMDGPU::SI_SPILL_A64_SAVE: + case AMDGPU::SI_SPILL_A64_RESTORE: return 2; case AMDGPU::SI_SPILL_S32_SAVE: case AMDGPU::SI_SPILL_S32_RESTORE: case AMDGPU::SI_SPILL_V32_SAVE: case AMDGPU::SI_SPILL_V32_RESTORE: + case AMDGPU::SI_SPILL_A32_SAVE: + case AMDGPU::SI_SPILL_A32_RESTORE: return 1; default: llvm_unreachable("Invalid spill opcode"); } @@ -541,6 +563,35 @@ static int getOffsetMUBUFLoad(unsigned Opc) { } } +static MachineInstrBuilder spillVGPRtoAGPR(MachineBasicBlock::iterator MI, + int Index, + unsigned Lane, + unsigned ValueReg, + bool IsKill) { + MachineBasicBlock *MBB = MI->getParent(); + MachineFunction *MF = MI->getParent()->getParent(); + SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); + const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); + + MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane); + + if (Reg == AMDGPU::NoRegister) + return MachineInstrBuilder(); + + bool IsStore = MI->mayStore(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); + + unsigned Dst = IsStore ? Reg : ValueReg; + unsigned Src = IsStore ? ValueReg : Reg; + unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32 + : AMDGPU::V_ACCVGPR_READ_B32; + + return BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst) + .addReg(Src, getKillRegState(IsKill)); +} + // This differs from buildSpillLoadStore by only scavenging a VGPR. It does not // need to handle the case where an SGPR may need to be spilled while spilling. static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, @@ -559,6 +610,9 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, return false; const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); + if (spillVGPRtoAGPR(MI, Index, 0, Reg->getReg(), false).getInstr()) + return true; + MachineInstrBuilder NewMI = BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) .add(*Reg) @@ -611,6 +665,10 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned Align = MFI.getObjectAlignment(Index); const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo(); + Register TmpReg = + hasAGPRs(RC) ? TII->getNamedOperand(*MI, AMDGPU::OpName::tmp)->getReg() + : Register(); + assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset"); if (!isUInt<12>(Offset + Size - EltSize)) { @@ -659,21 +717,38 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, SrcDstRegState |= getKillRegState(IsKill); } - MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i); - MachineMemOperand *NewMMO - = MF->getMachineMemOperand(PInfo, MMO->getFlags(), - EltSize, MinAlign(Align, EltSize * i)); - - auto MIB = BuildMI(*MBB, MI, DL, Desc) - .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill)) - .addReg(ScratchRsrcReg) - .addReg(SOffset, SOffsetRegState) - .addImm(Offset) - .addImm(0) // glc - .addImm(0) // slc - .addImm(0) // tfe - .addImm(0) // dlc - .addMemOperand(NewMMO); + auto MIB = spillVGPRtoAGPR(MI, Index, i, SubReg, IsKill); + + if (!MIB.getInstr()) { + unsigned FinalReg = SubReg; + if (TmpReg != AMDGPU::NoRegister) { + if (IsStore) + BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg) + .addReg(SubReg, getKillRegState(IsKill)); + SubReg = TmpReg; + } + + MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i); + MachineMemOperand *NewMMO + = MF->getMachineMemOperand(PInfo, MMO->getFlags(), + EltSize, MinAlign(Align, EltSize * i)); + + MIB = BuildMI(*MBB, MI, DL, Desc) + .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill)) + .addReg(ScratchRsrcReg) + .addReg(SOffset, SOffsetRegState) + .addImm(Offset) + .addImm(0) // glc + .addImm(0) // slc + .addImm(0) // tfe + .addImm(0) // dlc + .addMemOperand(NewMMO); + + if (!IsStore && TmpReg != AMDGPU::NoRegister) + MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32), + FinalReg) + .addReg(TmpReg, RegState::Kill); + } if (NumSubRegs > 1) MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState); @@ -1038,6 +1113,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( int FI, RegScavenger *RS) const { switch (MI->getOpcode()) { + case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: case AMDGPU::SI_SPILL_S160_SAVE: @@ -1046,6 +1122,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S32_SAVE: return spillSGPR(MI, FI, RS, true); + case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_S160_RESTORE: @@ -1080,6 +1157,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, switch (MI->getOpcode()) { // SGPR register spill + case AMDGPU::SI_SPILL_S1024_SAVE: case AMDGPU::SI_SPILL_S512_SAVE: case AMDGPU::SI_SPILL_S256_SAVE: case AMDGPU::SI_SPILL_S160_SAVE: @@ -1092,6 +1170,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } // SGPR register restore + case AMDGPU::SI_SPILL_S1024_RESTORE: case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_S160_RESTORE: @@ -1104,13 +1183,19 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, } // VGPR register spill + case AMDGPU::SI_SPILL_V1024_SAVE: case AMDGPU::SI_SPILL_V512_SAVE: case AMDGPU::SI_SPILL_V256_SAVE: case AMDGPU::SI_SPILL_V160_SAVE: case AMDGPU::SI_SPILL_V128_SAVE: case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: - case AMDGPU::SI_SPILL_V32_SAVE: { + case AMDGPU::SI_SPILL_V32_SAVE: + case AMDGPU::SI_SPILL_A1024_SAVE: + case AMDGPU::SI_SPILL_A512_SAVE: + case AMDGPU::SI_SPILL_A128_SAVE: + case AMDGPU::SI_SPILL_A64_SAVE: + case AMDGPU::SI_SPILL_A32_SAVE: { const MachineOperand *VData = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == @@ -1134,7 +1219,13 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V160_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: - case AMDGPU::SI_SPILL_V512_RESTORE: { + case AMDGPU::SI_SPILL_V512_RESTORE: + case AMDGPU::SI_SPILL_V1024_RESTORE: + case AMDGPU::SI_SPILL_A32_RESTORE: + case AMDGPU::SI_SPILL_A64_RESTORE: + case AMDGPU::SI_SPILL_A128_RESTORE: + case AMDGPU::SI_SPILL_A512_RESTORE: + case AMDGPU::SI_SPILL_A1024_RESTORE: { const MachineOperand *VData = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata); assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == |

