summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2015-11-30 21:15:53 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2015-11-30 21:15:53 +0000
commit0e3d38937e11408127c1ae07d89f5189df204405 (patch)
treecd773950ee36f9811b2a4c50a390a46398f26272 /llvm/lib
parentff6da2fe894f52abcddd7bfbf2a211d51867eb88 (diff)
downloadbcm5719-llvm-0e3d38937e11408127c1ae07d89f5189df204405.tar.gz
bcm5719-llvm-0e3d38937e11408127c1ae07d89f5189df204405.zip
AMDGPU: Remove SIPrepareScratchRegs
It does not work because of emergency stack slots. This pass was supposed to eliminate dummy registers for the spill instructions, but the register scavenger can introduce more during PrologEpilogInserter, so some would end up left behind if they were needed. The potential for spilling the scratch resource descriptor and offset register makes doing something like this overly complicated. Reserve registers to use for the resource descriptor and use them directly in eliminateFrameIndex. Also removes creating another scratch resource descriptor when directly selecting scratch MUBUF instructions. The choice of which registers are reserved is temporary. For now it attempts to pick the next available registers after the user and system SGPRs. llvm-svn: 254329
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp32
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp72
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp17
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp18
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp196
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp19
12 files changed, 123 insertions, 249 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 80766086e15..a620e85101e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -48,7 +48,6 @@ FunctionPass *createSIFixSGPRCopiesPass();
FunctionPass *createSIFixSGPRLiveRangesPass();
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
-FunctionPass *createSIPrepareScratchRegs();
ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 04a0c1d06af..85a06882ffe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1064,34 +1064,12 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
MachineFunction &MF = CurDAG->getMachineFunction();
const SIRegisterInfo *TRI =
static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const SITargetLowering& Lowering =
- *static_cast<const SITargetLowering*>(getTargetLowering());
-
- unsigned ScratchOffsetReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
- Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
- ScratchOffsetReg, MVT::i32);
- SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
- SDValue ScratchRsrcDword0 =
- SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);
-
- SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
- SDValue ScratchRsrcDword1 =
- SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const SDValue RsrcOps[] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
- ScratchRsrcDword0,
- CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
- ScratchRsrcDword1,
- CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
- };
- SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
- MVT::v2i32, RsrcOps), 0);
- Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
- SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
- MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
+ unsigned ScratchOffsetReg
+ = TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
+ Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
+ SOffset = CurDAG->getRegister(ScratchOffsetReg, MVT::i32);
// (add n0, c1)
if (CurDAG->isBaseWithConstantOffset(Addr)) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 4e31c7ab4d4..7b0445db4df 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -327,7 +327,6 @@ void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
}
void GCNPassConfig::addPostRegAlloc() {
- addPass(createSIPrepareScratchRegs(), false);
addPass(createSIShrinkInstructionsPass(), false);
}
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index 7a4b5bb6d35..64c9e1882e4 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -57,7 +57,6 @@ add_llvm_target(AMDGPUCodeGen
SILowerControlFlow.cpp
SILowerI1Copies.cpp
SIMachineFunctionInfo.cpp
- SIPrepareScratchRegs.cpp
SIRegisterInfo.cpp
SIShrinkInstructions.cpp
SITypeRewriter.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index a2d8fa1b0a1..6aff4b5700d 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -8,17 +8,89 @@
//==-----------------------------------------------------------------------===//
#include "SIFrameLowering.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/RegisterScavenging.h"
using namespace llvm;
+
+static bool hasOnlySGPRSpills(const SIMachineFunctionInfo *FuncInfo,
+ const MachineFrameInfo *FrameInfo) {
+ if (!FuncInfo->hasSpilledSGPRs())
+ return false;
+
+ if (FuncInfo->hasSpilledVGPRs())
+ return false;
+
+ for (int I = FrameInfo->getObjectIndexBegin(),
+ E = FrameInfo->getObjectIndexEnd(); I != E; ++I) {
+ if (!FrameInfo->isSpillSlotObjectIndex(I))
+ return false;
+ }
+
+ return true;
+}
+
+void SIFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ if (!MF.getFrameInfo()->hasStackObjects())
+ return;
+
+ assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ // If we only have SGPR spills, we won't actually be using scratch memory
+ // since these spill to VGPRs.
+ //
+ // FIXME: We should be cleaning up these unused SGPR spill frame indices
+ // somewhere.
+ if (hasOnlySGPRSpills(MFI, MF.getFrameInfo()))
+ return;
+
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SIRegisterInfo *TRI = &TII->getRegisterInfo();
+
+ // We need to insert initialization of the scratch resource descriptor.
+ unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
+ assert(ScratchRsrcReg != AMDGPU::NoRegister);
+
+ uint64_t Rsrc23 = TII->getScratchRsrcWords23();
+ MachineBasicBlock::iterator I = MBB.begin();
+ DebugLoc DL;
+
+ unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+ unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+ unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
+ unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD0");
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD1");
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
+ .addImm(Rsrc23 & 0xffffffff);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
+ .addImm(Rsrc23 >> 32);
+}
+
void SIFrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ if (!MFI->hasStackObjects())
+ return;
+
bool MayNeedScavengingEmergencySlot = MFI->hasStackObjects();
assert((RS || !MayNeedScavengingEmergencySlot) &&
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index 677128d6ce0..a9152fd8b2a 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -21,6 +21,9 @@ public:
AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
~SIFrameLowering() override {}
+ void emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const override;
+
void processFunctionBeforeFrameFinalized(
MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 94fad32c007..51cbc95bc07 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -552,6 +552,7 @@ SDValue SITargetLowering::LowerFormalArguments(
MachineFunction &MF = DAG.getMachineFunction();
FunctionType *FType = MF.getFunction()->getFunctionType();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
if (Subtarget->isAmdHsaOS() && Info->getShaderType() != ShaderType::COMPUTE) {
const Function *Fn = MF.getFunction();
@@ -622,9 +623,9 @@ SDValue SITargetLowering::LowerFormalArguments(
// The pointer to the scratch buffer is stored in SGPR2, SGPR3
if (Info->getShaderType() == ShaderType::COMPUTE) {
if (Subtarget->isAmdHsaOS())
- Info->NumUserSGPRs = 2; // FIXME: Need to support scratch buffers.
+ Info->NumUserSGPRs += 4; // FIXME: Need to support scratch buffers.
else
- Info->NumUserSGPRs = 4;
+ Info->NumUserSGPRs += 4;
unsigned InputPtrReg =
TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
@@ -750,6 +751,9 @@ SDValue SITargetLowering::LowerFormalArguments(
Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
}
+ if (MF.getFrameInfo()->hasStackObjects() || ST.isVGPRSpillingEnabled(Info))
+ Info->setScratchRSrcReg(TRI);
+
if (Chains.empty())
return Chain;
@@ -2335,15 +2339,6 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
}
-MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
- SDLoc DL,
- SDValue Ptr) const {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
- return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23());
-}
-
SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index a358e3fc3c0..0659dd7d5d0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -116,10 +116,6 @@ public:
SDValue Ptr,
uint32_t RsrcDword1,
uint64_t RsrcDword2And3) const;
- MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
- SDLoc DL,
- SDValue Ptr) const;
-
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9a85a1d515f..b7d2a471275 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -551,15 +551,16 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
+ unsigned ScratchOffsetPreloadReg
+ = RI.getPreloadedValue(*MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
+
unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize());
MFI->setHasSpilledVGPRs();
BuildMI(MBB, MI, DL, get(Opcode))
.addReg(SrcReg) // src
.addFrameIndex(FrameIndex) // frame_idx
- // Place-holder registers, these will be filled in by
- // SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef)
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(ScratchOffsetPreloadReg) // scratch_offset
.addMemOperand(MMO);
}
@@ -637,13 +638,14 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
+ unsigned ScratchOffsetPreloadReg
+ = RI.getPreloadedValue(*MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
+
unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize());
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addFrameIndex(FrameIndex) // frame_idx
- // Place-holder registers, these will be filled in by
- // SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef)
+ .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
+ .addReg(ScratchOffsetPreloadReg) // scratch_offset
.addMemOperand(MMO);
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 6269dce553f..d042844aa13 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -68,6 +68,14 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkItemIDZ = true;
}
+void SIMachineFunctionInfo::setScratchRSrcReg(const SIRegisterInfo *TRI) {
+ // We need to round up to next multiple of 4.
+ unsigned NextSReg128 = RoundUpToAlignment(NumUserSGPRs + 5, 4);
+ unsigned RegSub0 = AMDGPU::SReg_32RegClass.getRegister(NextSReg128);
+ ScratchRSrcReg = TRI->getMatchingSuperReg(RegSub0, AMDGPU::sub0,
+ &AMDGPU::SReg_128RegClass);
+}
+
SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
MachineFunction *MF,
unsigned FrameIndex,
diff --git a/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
deleted file mode 100644
index a6c22775e09..00000000000
--- a/llvm/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-//===-- SIPrepareScratchRegs.cpp - Use predicates for control flow --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// This pass loads scratch pointer and scratch offset into a register or a
-/// frame index which can be used anywhere in the program. These values will
-/// be used for spilling VGPRs.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIDefines.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-
-using namespace llvm;
-
-namespace {
-
-class SIPrepareScratchRegs : public MachineFunctionPass {
-
-private:
- static char ID;
-
-public:
- SIPrepareScratchRegs() : MachineFunctionPass(ID) { }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {
- return "SI prepare scratch registers";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-};
-
-} // End anonymous namespace
-
-char SIPrepareScratchRegs::ID = 0;
-
-FunctionPass *llvm::createSIPrepareScratchRegs() {
- return new SIPrepareScratchRegs();
-}
-
-bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
- const SIRegisterInfo *TRI = &TII->getRegisterInfo();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- MachineBasicBlock *Entry = &MF.front();
- MachineBasicBlock::iterator I = Entry->begin();
- DebugLoc DL = I->getDebugLoc();
-
- // FIXME: If we don't have enough VGPRs for SGPR spilling we will need to
- // run this pass.
- if (!MFI->hasSpilledVGPRs())
- return false;
-
- unsigned ScratchPtrPreloadReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
- unsigned ScratchOffsetPreloadReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
-
- if (!Entry->isLiveIn(ScratchPtrPreloadReg))
- Entry->addLiveIn(ScratchPtrPreloadReg);
-
- if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
- Entry->addLiveIn(ScratchOffsetPreloadReg);
-
- // Load the scratch offset.
- unsigned ScratchOffsetReg =
- TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
- int ScratchOffsetFI = -1;
-
- if (ScratchOffsetReg != AMDGPU::NoRegister) {
- // Found an SGPR to use
- BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
- .addReg(ScratchOffsetPreloadReg);
- } else {
- // No SGPR is available, we must spill.
- ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);
- BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))
- .addReg(ScratchOffsetPreloadReg)
- .addFrameIndex(ScratchOffsetFI)
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
- }
-
-
- // Now that we have the scratch pointer and offset values, we need to
- // add them to all the SI_SPILL_V* instructions.
-
- RegScavenger RS;
- unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4);
- RS.addScavengingFrameIndex(ScratchRsrcFI);
-
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
- // Add the scratch offset reg as a live-in so that the register scavenger
- // doesn't re-use it.
- if (!MBB.isLiveIn(ScratchOffsetReg) &&
- ScratchOffsetReg != AMDGPU::NoRegister)
- MBB.addLiveIn(ScratchOffsetReg);
- RS.enterBasicBlock(&MBB);
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
- RS.forward(I);
- DebugLoc DL = MI.getDebugLoc();
- if (!TII->isVGPRSpill(MI))
- continue;
-
- // Scratch resource
- unsigned ScratchRsrcReg =
- RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
-
- uint64_t Rsrc23 = TII->getScratchRsrcWords23();
-
- unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
- unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
- unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
- unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
- .addExternalSymbol("SCRATCH_RSRC_DWORD0")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
- .addExternalSymbol("SCRATCH_RSRC_DWORD1")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
- .addImm(Rsrc23 & 0xffffffff)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
- .addImm(Rsrc23 >> 32)
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
- // Scratch Offset
- if (ScratchOffsetReg == AMDGPU::NoRegister) {
- ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
- ScratchOffsetReg)
- .addFrameIndex(ScratchOffsetFI)
- .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
- .addReg(AMDGPU::SGPR0, RegState::Undef);
- } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
- MBB.addLiveIn(ScratchOffsetReg);
- }
-
- if (ScratchRsrcReg == AMDGPU::NoRegister ||
- ScratchOffsetReg == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF.getFunction()->getContext();
- Ctx.emitError("ran out of SGPRs for spilling VGPRs");
- ScratchRsrcReg = AMDGPU::SGPR0;
- ScratchOffsetReg = AMDGPU::SGPR0;
- }
- MI.getOperand(2).setReg(ScratchRsrcReg);
- MI.getOperand(2).setIsKill(true);
- MI.getOperand(2).setIsUndef(false);
- MI.getOperand(3).setReg(ScratchOffsetReg);
- MI.getOperand(3).setIsUndef(false);
- MI.getOperand(3).setIsKill(false);
- MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
- MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
- }
- }
- return true;
-}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index ab7539b6fb3..b392c86fa2e 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -68,6 +68,22 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
+ if (ScratchRSrcReg != AMDGPU::NoRegister) {
+ unsigned ScratchOffsetPreloadReg
+ = getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
+ // We will need to use this user SGPR argument for spilling, and thus never
+ // want it to be spilled.
+ reserveRegisterTuples(Reserved, ScratchOffsetPreloadReg);
+
+ // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
+ // to spill.
+ // TODO: May need to reserve a VGPR if doing LDS spilling.
+ reserveRegisterTuples(Reserved, ScratchRSrcReg);
+ assert(!isSubRegister(ScratchRSrcReg, ScratchOffsetPreloadReg));
+ }
+
return Reserved;
}
@@ -243,6 +259,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
.addReg(SubReg)
.addImm(Spill.Lane);
+ // FIXME: Since this spills to another register instead of an actual
+ // frame index, we should delete the frame index when all references to
+ // it are fixed.
}
MI->eraseFromParent();
break;
OpenPOWER on IntegriCloud