summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorTim Renouf <tim.renouf@amd.com>2017-09-29 09:49:35 +0000
committerTim Renouf <tim.renouf@amd.com>2017-09-29 09:49:35 +0000
commit132291589f9d754cc32c5c2da60da5da61849470 (patch)
treed85eda353ab671d3182a4d387e7401f1a4fdd2a7 /llvm/lib
parent9f7ead33344baf87f546e6f90d13b97e291cee4b (diff)
downloadbcm5719-llvm-132291589f9d754cc32c5c2da60da5da61849470.tar.gz
bcm5719-llvm-132291589f9d754cc32c5c2da60da5da61849470.zip
[AMDGPU] AMDPAL scratch buffer support
Summary: Added support for scratch (including spilling) for OS type amdpal: generates code to set up the scratch descriptor if it is needed. With amdpal, the scratch resource descriptor is loaded from offset 0 of the global information table. The low 32 bits of the address of the global information table is passed in s0. Added amdgpu-git-ptr-high function attribute to hard-wire the high 32 bits of the address of the global information table. If the function attribute is not specified, or is 0xffffffff, then the backend generates code to use the high 32 bits of pc. The documentation for the AMDPAL ABI will be added in a later commit. Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye Differential Revision: https://reviews.llvm.org/D37483 llvm-svn: 314501
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp61
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.h6
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h9
5 files changed, 95 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0facae0992b..85b056e5c82 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -895,19 +895,24 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->EmitIntValue(RsrcReg, 4);
OutStreamer->EmitIntValue(S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks), 4);
+ unsigned Rsrc2Val = 0;
if (STM.isVGPRSpillingEnabled(*MF.getFunction())) {
OutStreamer->EmitIntValue(R_0286E8_SPI_TMPRING_SIZE, 4);
OutStreamer->EmitIntValue(S_0286E8_WAVESIZE(CurrentProgramInfo.ScratchBlocks), 4);
+ if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
+ Rsrc2Val = S_00B84C_SCRATCH_EN(CurrentProgramInfo.ScratchBlocks > 0);
+ }
+ if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) {
+ OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
+ OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4);
+ OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
+ OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
+ Rsrc2Val |= S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks);
+ }
+ if (Rsrc2Val) {
+ OutStreamer->EmitIntValue(RsrcReg + 4 /*rsrc2*/, 4);
+ OutStreamer->EmitIntValue(Rsrc2Val, 4);
}
- }
-
- if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) {
- OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
- OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks), 4);
- OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
- OutStreamer->EmitIntValue(MFI->getPSInputEnable(), 4);
- OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
- OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
}
OutStreamer->EmitIntValue(R_SPILLED_SGPRS, 4);
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index ff6fed88e37..37f5665be50 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -219,7 +219,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
// Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was
// specified.
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- auto AMDGPUASI = ST.getAMDGPUAS();
if (ST.debuggerEmitPrologue())
emitDebuggerPrologue(MF, MBB);
@@ -356,7 +355,65 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
.addReg(PreloadedPrivateBufferReg, RegState::Kill);
}
- if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) {
+ if (ResourceRegUsed)
+ emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I,
+ PreloadedPrivateBufferReg, ScratchRsrcReg);
+}
+
+// Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
+void SIFrameLowering::emitEntryFunctionScratchSetup(const SISubtarget &ST,
+ MachineFunction &MF, MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI,
+ MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg,
+ unsigned ScratchRsrcReg) const {
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo *TRI = &TII->getRegisterInfo();
+ DebugLoc DL;
+ auto AMDGPUASI = ST.getAMDGPUAS();
+
+ if (ST.isAmdPalOS()) {
+ // The pointer to the GIT is formed from the offset passed in and either
+ // the amdgpu-git-ptr-high function attribute or the top part of the PC
+ unsigned RsrcLo = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+ unsigned RsrcHi = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+ unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
+
+ const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
+
+ if (MFI->getGITPtrHigh() != 0xffffffff) {
+ BuildMI(MBB, I, DL, SMovB32, RsrcHi)
+ .addImm(MFI->getGITPtrHigh())
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ } else {
+ const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
+ BuildMI(MBB, I, DL, GetPC64, Rsrc01);
+ }
+ BuildMI(MBB, I, DL, SMovB32, RsrcLo)
+ .addReg(AMDGPU::SGPR0) // Low address passed in
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ // We now have the GIT ptr - now get the scratch descriptor from the entry
+ // at offset 0.
+ PointerType *PtrTy =
+ PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
+ AMDGPUAS::CONSTANT_ADDRESS);
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+ const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
+ auto MMO = MF.getMachineMemOperand(PtrInfo,
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MOInvariant |
+ MachineMemOperand::MODereferenceable,
+ 0, 0);
+ BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
+ .addReg(Rsrc01)
+ .addImm(0) // offset
+ .addImm(0) // glc
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
+ .addMemOperand(MMO);
+ return;
+ }
+ if (ST.isMesaGfxShader(MF)
+ || (PreloadedPrivateBufferReg == AMDGPU::NoRegister)) {
assert(!ST.isAmdCodeObjectV2(MF));
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index cc1c85ff6bf..df6f1632a31 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -69,6 +69,12 @@ private:
/// \brief Emits debugger prologue.
void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
+ void emitEntryFunctionScratchSetup(const SISubtarget &ST, MachineFunction &MF,
+ MachineBasicBlock &MBB, SIMachineFunctionInfo *MFI,
+ MachineBasicBlock::iterator I, unsigned PreloadedPrivateBufferReg,
+ unsigned ScratchRsrcReg) const;
+
public:
bool hasFP(const MachineFunction &MF) const override;
bool hasSP(const MachineFunction &MF) const;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index ebb83fea1fd..0a92cd17654 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -48,7 +48,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkItemIDY(false),
WorkItemIDZ(false),
ImplicitBufferPtr(false),
- ImplicitArgPtr(false) {
+ ImplicitArgPtr(false),
+ GITPtrHigh(0xffffffff) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const Function *F = MF.getFunction();
FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
@@ -160,6 +161,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
if (HasStackObjects || F->hasFnAttribute("amdgpu-flat-scratch"))
FlatScratchInit = true;
}
+
+ Attribute A = F->getFnAttribute("amdgpu-git-ptr-high");
+ StringRef S = A.getValueAsString();
+ if (!S.empty())
+ S.consumeInteger(0, GITPtrHigh);
}
unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 242b41a5908..ade909cc84e 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -185,6 +185,11 @@ private:
// user arguments. This is an offset from the KernargSegmentPtr.
bool ImplicitArgPtr : 1;
+ // The hard-wired high half of the address of the global information table
+ // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
+ // current hardware only allows a 16 bit value.
+ unsigned GITPtrHigh;
+
MCPhysReg getNextUserSGPR() const {
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
return AMDGPU::SGPR0 + NumUserSGPRs;
@@ -406,6 +411,10 @@ public:
return ArgInfo.getPreloadedValue(Value).first->getRegister();
}
+ unsigned getGITPtrHigh() const {
+ return GITPtrHigh;
+ }
+
unsigned getNumUserSGPRs() const {
return NumUserSGPRs;
}
OpenPOWER on IntegriCloud