diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 23 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 61 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.h | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 9 |
5 files changed, 95 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 0facae0992b..85b056e5c82 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -895,19 +895,24 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF, OutStreamer->EmitIntValue(RsrcReg, 4); OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) | S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4); + unsigned Rsrc2Val = 0; if (STM.isVGPRSpillingEnabled(*MF.getFunction())) { OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4); OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4); + if (TM.getTargetTriple().getOS() == Triple::AMDPAL) + Rsrc2Val = S_00B84C_SCRATCH_EN(CurrentProgramInfo.ScratchBlocks > 0); + } + if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) { + OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); + OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4); + OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4); + OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4); + Rsrc2Val |= S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks); + } + if (Rsrc2Val) { + OutStreamer->EmitIntValue(RsrcReg + 4 /*rsrc2*/, 4); + OutStreamer->EmitIntValue(Rsrc2Val, 4); } - } - - if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) { - OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4); - OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks), 4); - OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4); - OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4); - OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4); - OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4); } OutStreamer->EmitIntValue(R_SPILLED_SGPRS, 4); diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index ff6fed88e37..37f5665be50 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -219,7 +219,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was // specified. const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); - auto AMDGPUASI = ST.getAMDGPUAS(); if (ST.debuggerEmitPrologue()) emitDebuggerPrologue(MF, MBB); @@ -356,7 +355,65 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, .addReg(PreloadedPrivateBufferReg, RegState::Kill); } - if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) { + if (ResourceRegUsed) + emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I, + PreloadedPrivateBufferReg, ScratchRsrcReg); +} + +// Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set. +void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST, + MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI, + MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg, + unsigned ScratchRsrcReg) const { + + const SIInstrInfo *TII = ST.getInstrInfo(); + const SIRegisterInfo *TRI = &TII->getRegisterInfo(); + DebugLoc DL; + auto AMDGPUASI = ST.getAMDGPUAS(); + + if (ST.isAmdPalOS()) { + // The pointer to the GIT is formed from the offset passed in and either + // the amdgpu-git-ptr-high function attribute or the top part of the PC + unsigned RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + unsigned RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1); + + const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); + + if (MFI->getGITPtrHigh() != 0xffffffff) { + BuildMI(MBB, I, DL, SMovB32, RsrcHi) + .addImm(MFI->getGITPtrHigh()) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + } else { + const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64); + BuildMI(MBB, I, DL, GetPC64, Rsrc01); + } + BuildMI(MBB, I, DL, SMovB32, RsrcLo) + .addReg(AMDGPU::SGPR0) // Low address passed in + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + + // We now have the GIT ptr - now get the scratch descriptor from the entry + // at offset 0. + PointerType *PtrTy = + PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()), + AMDGPUAS::CONSTANT_ADDRESS); + MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); + const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM); + auto MMO = MF.getMachineMemOperand(PtrInfo, + MachineMemOperand::MOLoad | + MachineMemOperand::MOInvariant | + MachineMemOperand::MODereferenceable, + 0, 0); + BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg) + .addReg(Rsrc01) + .addImm(0) // offset + .addImm(0) // glc + .addReg(ScratchRsrcReg, RegState::ImplicitDefine) + .addMemOperand(MMO); + return; + } + if (ST.isMesaGfxShader(MF) + || (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) { assert(!ST.isAmdCodeObjectV2(MF)); const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32); diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index cc1c85ff6bf..df6f1632a31 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -69,6 +69,12 @@ private: /// \brief Emits debugger prologue. void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const; + // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set. + void emitEntryFunctionScratchSetup(const SISubtarget &ST, MachineFunction &MF, + MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI, + MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg, + unsigned ScratchRsrcReg) const; + public: bool hasFP(const MachineFunction &MF) const override; bool hasSP(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index ebb83fea1fd..0a92cd17654 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -48,7 +48,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) WorkItemIDY(false), WorkItemIDZ(false), ImplicitBufferPtr(false), - ImplicitArgPtr(false) { + ImplicitArgPtr(false), + GITPtrHigh(0xffffffff) { const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); const Function *F = MF.getFunction(); FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F); @@ -160,6 +161,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch")) FlatScratchInit = true; } + + Attribute A = F->getFnAttribute("amdgpu-git-ptr-high"); + StringRef S = A.getValueAsString(); + if (!S.empty()) + S.consumeInteger(0, GITPtrHigh); } unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer( diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 242b41a5908..ade909cc84e 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -185,6 +185,11 @@ private: // user arguments. This is an offset from the KernargSegmentPtr. bool ImplicitArgPtr : 1; + // The hard-wired high half of the address of the global information table + // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since + // current hardware only allows a 16 bit value. + unsigned GITPtrHigh; + MCPhysReg getNextUserSGPR() const { assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs"); return AMDGPU::SGPR0 + NumUserSGPRs; @@ -406,6 +411,10 @@ public: return ArgInfo.getPreloadedValue(Value).first->getRegister(); } + unsigned getGITPtrHigh() const { + return GITPtrHigh; + } + unsigned getNumUserSGPRs() const { return NumUserSGPRs; } |

