summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFrameLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp175
1 files changed, 85 insertions, 90 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 4b2124b14c0..7f3150bdd01 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -164,34 +164,29 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
return ScratchRsrcReg;
}
-// Shift down registers reserved for the scratch wave offset and stack pointer
-// SGPRs.
-std::pair<unsigned, unsigned>
-SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
- const GCNSubtarget &ST,
- const SIInstrInfo *TII,
- const SIRegisterInfo *TRI,
- SIMachineFunctionInfo *MFI,
- MachineFunction &MF) const {
+// Shift down registers reserved for the scratch wave offset.
+unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
+ const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI,
+ SIMachineFunctionInfo *MFI, MachineFunction &MF) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
+ assert(MFI->isEntryFunction());
+
// No replacement necessary.
if (ScratchWaveOffsetReg == AMDGPU::NoRegister ||
- !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) {
- assert(MFI->getStackPtrOffsetReg() == AMDGPU::SP_REG);
- return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister);
+ (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) {
+ return AMDGPU::NoRegister;
}
- unsigned SPReg = MFI->getStackPtrOffsetReg();
if (ST.hasSGPRInitBug())
- return std::make_pair(ScratchWaveOffsetReg, SPReg);
+ return ScratchWaveOffsetReg;
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);
if (NumPreloaded > AllSGPRs.size())
- return std::make_pair(ScratchWaveOffsetReg, SPReg);
+ return ScratchWaveOffsetReg;
AllSGPRs = AllSGPRs.slice(NumPreloaded);
@@ -212,7 +207,7 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
unsigned ReservedRegCount = 13;
if (AllSGPRs.size() < ReservedRegCount)
- return std::make_pair(ScratchWaveOffsetReg, SPReg);
+ return ScratchWaveOffsetReg;
bool HandledScratchWaveOffsetReg =
ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
@@ -225,14 +220,20 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
HandledScratchWaveOffsetReg = true;
MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
+ if (MFI->getScratchWaveOffsetReg() == MFI->getStackPtrOffsetReg()) {
+ assert(!hasFP(MF));
+ MFI->setStackPtrOffsetReg(Reg);
+ }
+
MFI->setScratchWaveOffsetReg(Reg);
+ MFI->setFrameOffsetReg(Reg);
ScratchWaveOffsetReg = Reg;
break;
}
}
}
- return std::make_pair(ScratchWaveOffsetReg, SPReg);
+ return ScratchWaveOffsetReg;
}
void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
@@ -265,38 +266,11 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
if (MFI->hasFlatScratchInit())
emitFlatScratchInit(ST, MF, MBB);
- unsigned SPReg = MFI->getStackPtrOffsetReg();
- if (SPReg != AMDGPU::SP_REG) {
- assert(MRI.isReserved(SPReg) && "SPReg used but not reserved");
-
- DebugLoc DL;
- const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
- int64_t StackSize = FrameInfo.getStackSize();
-
- if (StackSize == 0) {
- BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg)
- .addReg(MFI->getScratchWaveOffsetReg());
- } else {
- BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
- .addReg(MFI->getScratchWaveOffsetReg())
- .addImm(StackSize * ST.getWavefrontSize());
- }
- }
-
unsigned ScratchRsrcReg
= getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
- unsigned ScratchWaveOffsetReg;
- std::tie(ScratchWaveOffsetReg, SPReg)
- = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
-
- // It's possible to have uses of only ScratchWaveOffsetReg without
- // ScratchRsrcReg if it's only used for the initialization of flat_scratch,
- // but the inverse is not true.
- if (ScratchWaveOffsetReg == AMDGPU::NoRegister) {
- assert(ScratchRsrcReg == AMDGPU::NoRegister);
- return;
- }
+ unsigned ScratchWaveOffsetReg =
+ getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
// We need to insert initialization of the scratch resource descriptor.
unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
@@ -308,18 +282,19 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
}
- bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg);
+ bool OffsetRegUsed = ScratchWaveOffsetReg != AMDGPU::NoRegister &&
+ MRI.isPhysRegUsed(ScratchWaveOffsetReg);
bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&
MRI.isPhysRegUsed(ScratchRsrcReg);
+ // FIXME: Hack to not crash in situations which emitted an error.
+ if (PreloadedScratchWaveOffsetReg == AMDGPU::NoRegister)
+ return;
+
// We added live-ins during argument lowering, but since they were not used
// they were deleted. We're adding the uses now, so add them back.
- if (OffsetRegUsed) {
- assert(PreloadedScratchWaveOffsetReg != AMDGPU::NoRegister &&
- "scratch wave offset input is required");
- MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
- MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
- }
+ MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
+ MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F));
@@ -360,11 +335,16 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
.addReg(PreloadedPrivateBufferReg, RegState::Kill);
}
- if (OffsetRegUsed &&
- PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
+ unsigned SPReg = MFI->getStackPtrOffsetReg();
+ assert(SPReg != AMDGPU::SP_REG);
+
+ // FIXME: Remove the isPhysRegUsed checks
+ const bool HasFP = hasFP(MF);
+
+ if (HasFP || OffsetRegUsed) {
+ assert(ScratchWaveOffsetReg);
BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
- .addReg(PreloadedScratchWaveOffsetReg,
- MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill);
+ .addReg(PreloadedScratchWaveOffsetReg, HasFP ? RegState::Kill : 0);
}
if (CopyBuffer && !CopyBufferFirst) {
@@ -372,9 +352,26 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
.addReg(PreloadedPrivateBufferReg, RegState::Kill);
}
- if (ResourceRegUsed)
+ if (ResourceRegUsed) {
emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I,
PreloadedPrivateBufferReg, ScratchRsrcReg);
+ }
+
+ if (HasFP) {
+ DebugLoc DL;
+ const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ int64_t StackSize = FrameInfo.getStackSize();
+
+ // On kernel entry, the private scratch wave offset is the SP value.
+ if (StackSize == 0) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SPReg)
+ .addReg(MFI->getScratchWaveOffsetReg());
+ } else {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), SPReg)
+ .addReg(MFI->getScratchWaveOffsetReg())
+ .addImm(StackSize * ST.getWavefrontSize());
+ }
+ }
}
// Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.
@@ -567,15 +564,12 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock::iterator MBBI = MBB.begin();
DebugLoc DL;
- // XXX - Is this the right predicate?
-
- bool NeedFP = hasFP(MF);
+ bool HasFP = false;
uint32_t NumBytes = MFI.getStackSize();
uint32_t RoundedSize = NumBytes;
- const bool NeedsRealignment = TRI.needsStackRealignment(MF);
- if (NeedsRealignment) {
- assert(NeedFP);
+ if (TRI.needsStackRealignment(MF)) {
+ HasFP = true;
const unsigned Alignment = MFI.getMaxAlignment();
RoundedSize += Alignment;
@@ -599,7 +593,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.addImm(-Alignment * ST.getWavefrontSize())
.setMIFlag(MachineInstr::FrameSetup);
FuncInfo->setIsStackRealigned(true);
- } else if (NeedFP) {
+ } else if ((HasFP = hasFP(MF))) {
// If we need a base pointer, set it up here. It's whatever the value of
// the stack pointer is at this point. Any variable size objects will be
// allocated after this, so we can still use the base pointer to reference
@@ -609,7 +603,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
- if (RoundedSize != 0 && hasSP(MF)) {
+ if (HasFP && RoundedSize != 0) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)
.addReg(StackPtrReg)
.addImm(RoundedSize * ST.getWavefrontSize())
@@ -693,23 +687,17 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(ScratchExecCopy);
}
- unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
- if (StackPtrReg == AMDGPU::NoRegister)
- return;
-
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- uint32_t NumBytes = MFI.getStackSize();
-
- // FIXME: Clarify distinction between no set SP and SP. For callee functions,
- // it's really whether we need SP to be accurate or not.
-
- if (NumBytes != 0 && hasSP(MF)) {
+ if (hasFP(MF)) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ uint32_t NumBytes = MFI.getStackSize();
uint32_t RoundedSize = FuncInfo->isStackRealigned() ?
NumBytes + MFI.getMaxAlignment() : NumBytes;
+ const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)
.addReg(StackPtrReg)
- .addImm(RoundedSize * ST.getWavefrontSize());
+ .addImm(RoundedSize * ST.getWavefrontSize())
+ .setMIFlag(MachineInstr::FrameDestroy);
}
}
@@ -849,18 +837,25 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
}
bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
- // All stack operations are relative to the frame offset SGPR.
- // TODO: Still want to eliminate sometimes.
const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.hasCalls()) {
+ // All offsets are unsigned, so need to be addressed in the same direction
+ // as stack growth.
+ if (MFI.getStackSize() != 0)
+ return true;
+
+ // For the entry point, the input wave scratch offset must be copied to the
+ // API SP if there are calls.
+ if (MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction())
+ return true;
+
+ // Retain behavior of always omitting the FP for leaf functions when
+ // possible.
+ if (MF.getTarget().Options.DisableFramePointerElim(MF))
+ return true;
+ }
- // XXX - Is this only called after frame is finalized? Should be able to check
- // frame size.
- return MFI.hasStackObjects() && !allStackObjectsAreDead(MFI);
-}
-
-bool SIFrameLowering::hasSP(const MachineFunction &MF) const {
- const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
- // All stack operations are relative to the frame offset SGPR.
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- return MFI.hasCalls() || MFI.hasVarSizedObjects() || TRI->needsStackRealignment(MF);
+ return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
+ MFI.hasStackMap() || MFI.hasPatchPoint() ||
+ MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF);
}
OpenPOWER on IntegriCloud