diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-08-02 01:52:45 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-08-02 01:52:45 +0000 |
| commit | 8e8f8f43b043b1839973fcc28694ca8d220a2137 (patch) | |
| tree | f7a857bfcd95c3eb2e3d235938ffdcf0df62a6ae /llvm/lib/Target | |
| parent | 1d6317c3ad5d16355f2a261ff8bdda78f76357b5 (diff) | |
| download | bcm5719-llvm-8e8f8f43b043b1839973fcc28694ca8d220a2137.tar.gz bcm5719-llvm-8e8f8f43b043b1839973fcc28694ca8d220a2137.zip | |
AMDGPU: Fix clobbering CSR VGPRs when spilling SGPR to it
llvm-svn: 309783
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 25 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 19 |
3 files changed, 60 insertions, 6 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 79bae0aa1f0..f7e5cb03b3e 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -454,6 +454,15 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, .addImm(NumBytes * ST.getWavefrontSize()) .setMIFlag(MachineInstr::FrameSetup); } + + for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg + : FuncInfo->getSGPRSpillVGPRs()) { + if (!Reg.FI.hasValue()) + continue; + TII->storeRegToStackSlot(MBB, MBBI, Reg.VGPR, true, + Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass, + &TII->getRegisterInfo()); + } } void SIFrameLowering::emitEpilogue(MachineFunction &MF, @@ -462,6 +471,19 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, if (FuncInfo->isEntryFunction()) return; + const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + + for (const SIMachineFunctionInfo::SGPRSpillVGPRCSR &Reg + : FuncInfo->getSGPRSpillVGPRs()) { + if (!Reg.FI.hasValue()) + continue; + TII->loadRegFromStackSlot(MBB, MBBI, Reg.VGPR, + Reg.FI.getValue(), &AMDGPU::VGPR_32RegClass, + &TII->getRegisterInfo()); + } + unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); if (StackPtrReg == AMDGPU::NoRegister) return; @@ -469,9 +491,6 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF, const MachineFrameInfo &MFI = MF.getFrameInfo(); uint32_t NumBytes = MFI.getStackSize(); - const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); - const SIInstrInfo *TII = ST.getInstrInfo(); - MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc DL; // FIXME: Clarify distinction between no set SP and SP. For callee functions, diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 04e57bedb21..cfc9fe5fa51 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -237,6 +237,15 @@ unsigned SIMachineFunctionInfo::addImplicitBufferPtr(const SIRegisterInfo &TRI) return ImplicitBufferPtrUserSGPR; } +static bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg) { + for (unsigned I = 0; CSRegs[I]; ++I) { + if (CSRegs[I] == Reg) + return true; + } + + return false; +} + /// Reserve a slice of a VGPR to support spilling for FrameIndex \p FI. bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, int FI) { @@ -258,6 +267,8 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, int NumLanes = Size / 4; + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + // Make sure to handle the case where a wide SGPR spill may span between two // VGPRs. for (int I = 0; I < NumLanes; ++I, ++NumVGPRSpillLanes) { @@ -274,14 +285,21 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF, return false; } - SpillVGPRs.push_back(LaneVGPR); + Optional<int> CSRSpillFI; + if (FrameInfo.hasCalls() && CSRegs && isCalleeSavedReg(CSRegs, LaneVGPR)) { + // TODO: Should this be a CreateSpillStackObject? This is technically a + // weird CSR spill. + CSRSpillFI = FrameInfo.CreateStackObject(4, 4, false); + } + + SpillVGPRs.push_back(SGPRSpillVGPRCSR(LaneVGPR, CSRSpillFI)); // Add this register as live-in to all blocks to avoid machine verifer // complaining about use of an undefined physical register. for (MachineBasicBlock &BB : MF) BB.addLiveIn(LaneVGPR); } else { - LaneVGPR = SpillVGPRs.back(); + LaneVGPR = SpillVGPRs.back().VGPR; } SpillLanes.push_back(SpilledReg(LaneVGPR, VGPRIndex)); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 8511403ebc3..94145c46e10 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -211,6 +211,19 @@ public: bool hasReg() { return VGPR != AMDGPU::NoRegister;} }; + struct SGPRSpillVGPRCSR { + // VGPR used for SGPR spills + unsigned VGPR; + + // If the VGPR is a CSR, the stack slot used to save/restore it in the + // prolog/epilog. + Optional<int> FI; + + SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : + VGPR(V), + FI(F) {} + }; + private: // SGPR->VGPR spilling support. typedef std::pair<unsigned, unsigned> SpillRegMask; @@ -219,7 +232,7 @@ private: // frameindex key. DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills; unsigned NumVGPRSpillLanes = 0; - SmallVector<unsigned, 2> SpillVGPRs; + SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs; public: @@ -231,6 +244,10 @@ public: ArrayRef<SpilledReg>() : makeArrayRef(I->second); } + ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const { + return SpillVGPRs; + } + bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI); |

