summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp142
1 files changed, 74 insertions, 68 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 1ca11da247e..8a08bc463da 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1770,6 +1770,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
// should reserve the arguments and use them directly.
MachineFrameInfo &MFI = MF.getFrameInfo();
bool HasStackObjects = MFI.hasStackObjects();
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
// Record that we know we have non-spill stack objects so we don't need to
// check all stack objects later.
@@ -1785,65 +1786,85 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
// the scratch registers to pass in.
bool RequiresStackAccess = HasStackObjects || MFI.hasCalls();
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- if (ST.isAmdHsaOrMesa(MF.getFunction())) {
- if (RequiresStackAccess) {
- // If we have stack objects, we unquestionably need the private buffer
- // resource. For the Code Object V2 ABI, this will be the first 4 user
- // SGPR inputs. We can reserve those and use them directly.
-
- unsigned PrivateSegmentBufferReg = Info.getPreloadedReg(
- AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
- Info.setScratchRSrcReg(PrivateSegmentBufferReg);
-
- if (MFI.hasCalls()) {
- // If we have calls, we need to keep the frame register in a register
- // that won't be clobbered by a call, so ensure it is copied somewhere.
-
- // This is not a problem for the scratch wave offset, because the same
- // registers are reserved in all functions.
-
- // FIXME: Nothing is really ensuring this is a call preserved register,
- // it's just selected from the end so it happens to be.
- unsigned ReservedOffsetReg
- = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
- Info.setScratchWaveOffsetReg(ReservedOffsetReg);
- } else {
- unsigned PrivateSegmentWaveByteOffsetReg = Info.getPreloadedReg(
- AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
- Info.setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg);
- }
- } else {
- unsigned ReservedBufferReg
- = TRI.reservedPrivateSegmentBufferReg(MF);
- unsigned ReservedOffsetReg
- = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
-
- // We tentatively reserve the last registers (skipping the last two
- // which may contain VCC). After register allocation, we'll replace
- // these with the ones immediately after those which were really
- // allocated. In the prologue copies will be inserted from the argument
- // to these reserved registers.
- Info.setScratchRSrcReg(ReservedBufferReg);
- Info.setScratchWaveOffsetReg(ReservedOffsetReg);
- }
+ if (RequiresStackAccess && ST.isAmdHsaOrMesa(MF.getFunction())) {
+ // If we have stack objects, we unquestionably need the private buffer
+ // resource. For the Code Object V2 ABI, this will be the first 4 user
+ // SGPR inputs. We can reserve those and use them directly.
+
+ unsigned PrivateSegmentBufferReg =
+ Info.getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
+ Info.setScratchRSrcReg(PrivateSegmentBufferReg);
} else {
unsigned ReservedBufferReg = TRI.reservedPrivateSegmentBufferReg(MF);
+ // We tentatively reserve the last registers (skipping the last registers
+ // which may contain VCC, FLAT_SCR, and XNACK). After register allocation,
+ // we'll replace these with the ones immediately after those which were
+ // really allocated. In the prologue copies will be inserted from the
+ // argument to these reserved registers.
// Without HSA, relocations are used for the scratch pointer and the
// buffer resource setup is always inserted in the prologue. Scratch wave
// offset is still in an input SGPR.
Info.setScratchRSrcReg(ReservedBufferReg);
+ }
- if (HasStackObjects && !MFI.hasCalls()) {
- unsigned ScratchWaveOffsetReg = Info.getPreloadedReg(
- AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
- Info.setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+ // This should be accurate for kernels even before the frame is finalized.
+ const bool HasFP = ST.getFrameLowering()->hasFP(MF);
+ if (HasFP) {
+ unsigned ReservedOffsetReg =
+ TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // Try to use s32 as the SP, but move it if it would interfere with input
+ // arguments. This won't work with calls though.
+ //
+ // FIXME: Move SP to avoid any possible inputs, or find a way to spill input
+ // registers.
+ if (!MRI.isLiveIn(AMDGPU::SGPR32)) {
+ Info.setStackPtrOffsetReg(AMDGPU::SGPR32);
} else {
- unsigned ReservedOffsetReg
- = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
- Info.setScratchWaveOffsetReg(ReservedOffsetReg);
+ assert(AMDGPU::isShader(MF.getFunction().getCallingConv()));
+
+ if (MFI.hasCalls())
+ report_fatal_error("call in graphics shader with too many input SGPRs");
+
+ for (unsigned Reg : AMDGPU::SGPR_32RegClass) {
+ if (!MRI.isLiveIn(Reg)) {
+ Info.setStackPtrOffsetReg(Reg);
+ break;
+ }
+ }
+
+ if (Info.getStackPtrOffsetReg() == AMDGPU::SP_REG)
+ report_fatal_error("failed to find register for SP");
}
+
+ Info.setScratchWaveOffsetReg(ReservedOffsetReg);
+ Info.setFrameOffsetReg(ReservedOffsetReg);
+ } else if (RequiresStackAccess) {
+ assert(!MFI.hasCalls());
+ // We know there are accesses and they will be done relative to SP, so just
+ // pin it to the input.
+ //
+ // FIXME: Should not do this if inline asm is reading/writing these
+ // registers.
+ unsigned PreloadedSP = Info.getPreloadedReg(
+ AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+
+ Info.setStackPtrOffsetReg(PreloadedSP);
+ Info.setScratchWaveOffsetReg(PreloadedSP);
+ Info.setFrameOffsetReg(PreloadedSP);
+ } else {
+ assert(!MFI.hasCalls());
+
+ // There may not be stack access at all. There may still be spills, or
+ // access of a constant pointer (in which cases an extra copy will be
+ // emitted in the prolog).
+ unsigned ReservedOffsetReg
+ = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF);
+ Info.setStackPtrOffsetReg(ReservedOffsetReg);
+ Info.setScratchWaveOffsetReg(ReservedOffsetReg);
+ Info.setFrameOffsetReg(ReservedOffsetReg);
}
}
@@ -9939,7 +9960,6 @@ SITargetLowering::getConstraintType(StringRef Constraint) const {
void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
if (Info->isEntryFunction()) {
@@ -9947,24 +9967,10 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
}
- // We have to assume the SP is needed in case there are calls in the function
- // during lowering. Calls are only detected after the function is
- // lowered. We're about to reserve registers, so don't bother using it if we
- // aren't really going to use it.
- bool NeedSP = !Info->isEntryFunction() ||
- MFI.hasVarSizedObjects() ||
- MFI.hasCalls();
-
- if (NeedSP) {
- unsigned ReservedStackPtrOffsetReg = TRI->reservedStackPtrOffsetReg(MF);
- Info->setStackPtrOffsetReg(ReservedStackPtrOffsetReg);
-
- assert(Info->getStackPtrOffsetReg() != Info->getFrameOffsetReg());
- assert(!TRI->isSubRegister(Info->getScratchRSrcReg(),
- Info->getStackPtrOffsetReg()));
- if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG)
- MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());
- }
+ assert(!TRI->isSubRegister(Info->getScratchRSrcReg(),
+ Info->getStackPtrOffsetReg()));
+ if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG)
+ MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());
// We need to worry about replacing the default register with itself in case
// of MIR testcases missing the MFI.
OpenPOWER on IntegriCloud