diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 175 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.h | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 142 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 14 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 61 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.h | 2 | 
9 files changed, 218 insertions, 209 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 59a27ab1401..2c104758047 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1361,10 +1361,10 @@ std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const      SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),                                                FI->getValueType(0)); -    // If we can resolve this to a frame index access, this is relative to the -    // frame pointer SGPR. -    return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), -                                                   MVT::i32)); +    // If we can resolve this to a frame index access, this will be relative to +    // either the stack or frame pointer SGPR. +    return std::make_pair( +        TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));    }    // If we don't know this private access is a local stack object, it needs to diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp index 60f23f7d778..140ca6e33fe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -83,6 +83,9 @@ const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,  }  unsigned SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const { +  const SIFrameLowering *TFI = +      MF.getSubtarget<GCNSubtarget>().getFrameLowering();    const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>(); -  return FuncInfo->getFrameOffsetReg(); +  return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() +                        : FuncInfo->getStackPtrOffsetReg();  } diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 4b2124b14c0..7f3150bdd01 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -164,34 +164,29 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(    return ScratchRsrcReg;  } -// Shift down registers reserved for the scratch wave offset and stack pointer -// SGPRs. -std::pair<unsigned, unsigned> -SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( -  const GCNSubtarget &ST, -  const SIInstrInfo *TII, -  const SIRegisterInfo *TRI, -  SIMachineFunctionInfo *MFI, -  MachineFunction &MF) const { +// Shift down registers reserved for the scratch wave offset. +unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( +    const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, +    SIMachineFunctionInfo *MFI, MachineFunction &MF) const {    MachineRegisterInfo &MRI = MF.getRegInfo();    unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); +  assert(MFI->isEntryFunction()); +    // No replacement necessary.    if (ScratchWaveOffsetReg == AMDGPU::NoRegister || -      !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) { -    assert(MFI->getStackPtrOffsetReg() == AMDGPU::SP_REG); -    return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister); +      (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) { +    return AMDGPU::NoRegister;    } -  unsigned SPReg = MFI->getStackPtrOffsetReg();    if (ST.hasSGPRInitBug()) -    return std::make_pair(ScratchWaveOffsetReg, SPReg); +    return ScratchWaveOffsetReg;    unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();    ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);    if (NumPreloaded > AllSGPRs.size()) -    return std::make_pair(ScratchWaveOffsetReg, SPReg); +    return ScratchWaveOffsetReg;    AllSGPRs = AllSGPRs.slice(NumPreloaded); @@ -212,7 +207,7 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(    unsigned ReservedRegCount = 13;    if (AllSGPRs.size() < ReservedRegCount) -    return std::make_pair(ScratchWaveOffsetReg, SPReg); +    return ScratchWaveOffsetReg;    bool HandledScratchWaveOffsetReg =      ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF); @@ -225,14 +220,20 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(          HandledScratchWaveOffsetReg = true;          MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); +        if (MFI->getScratchWaveOffsetReg() == MFI->getStackPtrOffsetReg()) { +          assert(!hasFP(MF)); +          MFI->setStackPtrOffsetReg(Reg); +        } +          MFI->setScratchWaveOffsetReg(Reg); +        MFI->setFrameOffsetReg(Reg);          ScratchWaveOffsetReg = Reg;          break;        }      }    } -  return std::make_pair(ScratchWaveOffsetReg, SPReg); +  return ScratchWaveOffsetReg;  }  void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, @@ -265,38 +266,11 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,    if (MFI->hasFlatScratchInit())      emitFlatScratchInit(ST, MF, MBB); -  unsigned SPReg = MFI->getStackPtrOffsetReg(); -  if (SPReg != AMDGPU::SP_REG) { -    assert(MRI.isReserved(SPReg) && "SPReg used but not reserved"); - -    DebugLoc DL; -    const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); -    int64_t StackSize = FrameInfo.getStackSize(); - -    if (StackSize == 0) { -      BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg) -        .addReg(MFI->getScratchWaveOffsetReg()); -    } else { -      BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg) -        .addReg(MFI->getScratchWaveOffsetReg()) -        .addImm(StackSize * ST.getWavefrontSize()); -    } -  } -    unsigned ScratchRsrcReg      = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); -  unsigned ScratchWaveOffsetReg; -  std::tie(ScratchWaveOffsetReg, SPReg) -    = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); - -  // It's possible to have uses of only ScratchWaveOffsetReg without -  // ScratchRsrcReg if it's only used for the initialization of flat_scratch, -  // but the inverse is not true. -  if (ScratchWaveOffsetReg == AMDGPU::NoRegister) { -    assert(ScratchRsrcReg == AMDGPU::NoRegister); -    return; -  } +  unsigned ScratchWaveOffsetReg = +      getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);    // We need to insert initialization of the scratch resource descriptor.    unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( @@ -308,18 +282,19 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,        AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);    } -  bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg); +  bool OffsetRegUsed = ScratchWaveOffsetReg != AMDGPU::NoRegister && +                       MRI.isPhysRegUsed(ScratchWaveOffsetReg);    bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&                           MRI.isPhysRegUsed(ScratchRsrcReg); +  // FIXME: Hack to not crash in situations which emitted an error. +  if (PreloadedScratchWaveOffsetReg == AMDGPU::NoRegister) +    return; +    // We added live-ins during argument lowering, but since they were not used    // they were deleted. We're adding the uses now, so add them back. -  if (OffsetRegUsed) { -    assert(PreloadedScratchWaveOffsetReg != AMDGPU::NoRegister && -           "scratch wave offset input is required"); -    MRI.addLiveIn(PreloadedScratchWaveOffsetReg); -    MBB.addLiveIn(PreloadedScratchWaveOffsetReg); -  } +  MRI.addLiveIn(PreloadedScratchWaveOffsetReg); +  MBB.addLiveIn(PreloadedScratchWaveOffsetReg);    if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {      assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F)); @@ -360,11 +335,16 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,        .addReg(PreloadedPrivateBufferReg, RegState::Kill);    } -  if (OffsetRegUsed && -      PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) { +  unsigned SPReg = MFI->getStackPtrOffsetReg(); +  assert(SPReg != AMDGPU::SP_REG); + +  // FIXME: Remove the isPhysRegUsed checks +  const bool HasFP = hasFP(MF); + +  if (HasFP || OffsetRegUsed) { +    assert(ScratchWaveOffsetReg);      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg) -      .addReg(PreloadedScratchWaveOffsetReg, -              MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill); +      .addReg(PreloadedScratchWaveOffsetReg, HasFP ? RegState::Kill : 0);    }    if (CopyBuffer && !CopyBufferFirst) { @@ -372,9 +352,26 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,        .addReg(PreloadedPrivateBufferReg, RegState::Kill);    } -  if (ResourceRegUsed) +  if (ResourceRegUsed) {      emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I,          PreloadedPrivateBufferReg, ScratchRsrcReg); +  } + +  if (HasFP) { +    DebugLoc DL; +    const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); +    int64_t StackSize = FrameInfo.getStackSize(); + +    // On kernel entry, the private scratch wave offset is the SP value. +    if (StackSize == 0) { +      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SPReg) +        .addReg(MFI->getScratchWaveOffsetReg()); +    } else { +      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), SPReg) +        .addReg(MFI->getScratchWaveOffsetReg()) +        .addImm(StackSize * ST.getWavefrontSize()); +    } +  }  }  // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set. @@ -567,15 +564,12 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,    MachineBasicBlock::iterator MBBI = MBB.begin();    DebugLoc DL; -  // XXX - Is this the right predicate? - -  bool NeedFP = hasFP(MF); +  bool HasFP = false;    uint32_t NumBytes = MFI.getStackSize();    uint32_t RoundedSize = NumBytes; -  const bool NeedsRealignment = TRI.needsStackRealignment(MF); -  if (NeedsRealignment) { -    assert(NeedFP); +  if (TRI.needsStackRealignment(MF)) { +    HasFP = true;      const unsigned Alignment = MFI.getMaxAlignment();      RoundedSize += Alignment; @@ -599,7 +593,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,        .addImm(-Alignment * ST.getWavefrontSize())        .setMIFlag(MachineInstr::FrameSetup);      FuncInfo->setIsStackRealigned(true); -  } else if (NeedFP) { +  } else if ((HasFP = hasFP(MF))) {      // If we need a base pointer, set it up here. It's whatever the value of      // the stack pointer is at this point. Any variable size objects will be      // allocated after this, so we can still use the base pointer to reference @@ -609,7 +603,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,        .setMIFlag(MachineInstr::FrameSetup);    } -  if (RoundedSize != 0 && hasSP(MF)) { +  if (HasFP && RoundedSize != 0) {      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)        .addReg(StackPtrReg)        .addImm(RoundedSize * ST.getWavefrontSize()) @@ -693,23 +687,17 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,        .addReg(ScratchExecCopy);    } -  unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); -  if (StackPtrReg == AMDGPU::NoRegister) -    return; - -  const MachineFrameInfo &MFI = MF.getFrameInfo(); -  uint32_t NumBytes = MFI.getStackSize(); - -  // FIXME: Clarify distinction between no set SP and SP. For callee functions, -  // it's really whether we need SP to be accurate or not. - -  if (NumBytes != 0 && hasSP(MF)) { +  if (hasFP(MF)) { +    const MachineFrameInfo &MFI = MF.getFrameInfo(); +    uint32_t NumBytes = MFI.getStackSize();      uint32_t RoundedSize = FuncInfo->isStackRealigned() ?        NumBytes + MFI.getMaxAlignment() : NumBytes; +    const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)        .addReg(StackPtrReg) -      .addImm(RoundedSize * ST.getWavefrontSize()); +      .addImm(RoundedSize * ST.getWavefrontSize()) +      .setMIFlag(MachineInstr::FrameDestroy);    }  } @@ -849,18 +837,25 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(  }  bool SIFrameLowering::hasFP(const MachineFunction &MF) const { -  // All stack operations are relative to the frame offset SGPR. -  // TODO: Still want to eliminate sometimes.    const MachineFrameInfo &MFI = MF.getFrameInfo(); +  if (MFI.hasCalls()) { +    // All offsets are unsigned, so need to be addressed in the same direction +    // as stack growth. +    if (MFI.getStackSize() != 0) +      return true; + +    // For the entry point, the input wave scratch offset must be copied to the +    // API SP if there are calls. +    if (MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) +      return true; + +    // Retain behavior of always omitting the FP for leaf functions when +    // possible. +    if (MF.getTarget().Options.DisableFramePointerElim(MF)) +      return true; +  } -  // XXX - Is this only called after frame is finalized? Should be able to check -  // frame size. -  return MFI.hasStackObjects() && !allStackObjectsAreDead(MFI); -} - -bool SIFrameLowering::hasSP(const MachineFunction &MF) const { -  const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); -  // All stack operations are relative to the frame offset SGPR. -  const MachineFrameInfo &MFI = MF.getFrameInfo(); -  return MFI.hasCalls() || MFI.hasVarSizedObjects() || TRI->needsStackRealignment(MF); +  return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || +    MFI.hasStackMap() || MFI.hasPatchPoint() || +    MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF);  } diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index c5b707cba06..a9e765aa36e 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -58,12 +58,9 @@ private:      SIMachineFunctionInfo *MFI,      MachineFunction &MF) const; -  std::pair<unsigned, unsigned> getReservedPrivateSegmentWaveByteOffsetReg( -    const GCNSubtarget &ST, -    const SIInstrInfo *TII, -    const SIRegisterInfo *TRI, -    SIMachineFunctionInfo *MFI, -    MachineFunction &MF) const; +  unsigned getReservedPrivateSegmentWaveByteOffsetReg( +      const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, +      SIMachineFunctionInfo *MFI, MachineFunction &MF) const;    // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set.    void emitEntryFunctionScratchSetup(const GCNSubtarget &ST, MachineFunction &MF, @@ -73,7 +70,6 @@ private:  public:    bool hasFP(const MachineFunction &MF) const override; -  bool hasSP(const MachineFunction &MF) const;  };  } // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1ca11da247e..8a08bc463da 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1770,6 +1770,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,    // should reserve the arguments and use them directly.    MachineFrameInfo &MFI = MF.getFrameInfo();    bool HasStackObjects = MFI.hasStackObjects(); +  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();    // Record that we know we have non-spill stack objects so we don't need to    // check all stack objects later. @@ -1785,65 +1786,85 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,    // the scratch registers to pass in.    bool RequiresStackAccess = HasStackObjects || MFI.hasCalls(); -  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); -  if (ST.isAmdHsaOrMesa(MF.getFunction())) { -    if (RequiresStackAccess) { -      // If we have stack objects, we unquestionably need the private buffer -      // resource. For the Code Object V2 ABI, this will be the first 4 user -      // SGPR inputs. We can reserve those and use them directly. - -      unsigned PrivateSegmentBufferReg = Info.getPreloadedReg( -        AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); -      Info.setScratchRSrcReg(PrivateSegmentBufferReg); - -      if (MFI.hasCalls()) { -        // If we have calls, we need to keep the frame register in a register -        // that won't be clobbered by a call, so ensure it is copied somewhere. - -        // This is not a problem for the scratch wave offset, because the same -        // registers are reserved in all functions. - -        // FIXME: Nothing is really ensuring this is a call preserved register, -        // it's just selected from the end so it happens to be. -        unsigned ReservedOffsetReg -          = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF); -        Info.setScratchWaveOffsetReg(ReservedOffsetReg); -      } else { -        unsigned PrivateSegmentWaveByteOffsetReg = Info.getPreloadedReg( -          AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); -        Info.setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg); -      } -    } else { -      unsigned ReservedBufferReg -        = TRI.reservedPrivateSegmentBufferReg(MF); -      unsigned ReservedOffsetReg -        = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF); - -      // We tentatively reserve the last registers (skipping the last two -      // which may contain VCC). After register allocation, we'll replace -      // these with the ones immediately after those which were really -      // allocated. In the prologue copies will be inserted from the argument -      // to these reserved registers. -      Info.setScratchRSrcReg(ReservedBufferReg); -      Info.setScratchWaveOffsetReg(ReservedOffsetReg); -    } +  if (RequiresStackAccess && ST.isAmdHsaOrMesa(MF.getFunction())) { +    // If we have stack objects, we unquestionably need the private buffer +    // resource. For the Code Object V2 ABI, this will be the first 4 user +    // SGPR inputs. We can reserve those and use them directly. + +    unsigned PrivateSegmentBufferReg = +        Info.getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER); +    Info.setScratchRSrcReg(PrivateSegmentBufferReg);    } else {      unsigned ReservedBufferReg = TRI.reservedPrivateSegmentBufferReg(MF); +    // We tentatively reserve the last registers (skipping the last registers +    // which may contain VCC, FLAT_SCR, and XNACK). After register allocation, +    // we'll replace these with the ones immediately after those which were +    // really allocated. In the prologue copies will be inserted from the +    // argument to these reserved registers.      // Without HSA, relocations are used for the scratch pointer and the      // buffer resource setup is always inserted in the prologue. Scratch wave      // offset is still in an input SGPR.      Info.setScratchRSrcReg(ReservedBufferReg); +  } -    if (HasStackObjects && !MFI.hasCalls()) { -      unsigned ScratchWaveOffsetReg = Info.getPreloadedReg( -        AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); -      Info.setScratchWaveOffsetReg(ScratchWaveOffsetReg); +  // This should be accurate for kernels even before the frame is finalized. +  const bool HasFP = ST.getFrameLowering()->hasFP(MF); +  if (HasFP) { +    unsigned ReservedOffsetReg = +        TRI.reservedPrivateSegmentWaveByteOffsetReg(MF); +    MachineRegisterInfo &MRI = MF.getRegInfo(); + +    // Try to use s32 as the SP, but move it if it would interfere with input +    // arguments. This won't work with calls though. +    // +    // FIXME: Move SP to avoid any possible inputs, or find a way to spill input +    // registers. +    if (!MRI.isLiveIn(AMDGPU::SGPR32)) { +      Info.setStackPtrOffsetReg(AMDGPU::SGPR32);      } else { -      unsigned ReservedOffsetReg -        = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF); -      Info.setScratchWaveOffsetReg(ReservedOffsetReg); +      assert(AMDGPU::isShader(MF.getFunction().getCallingConv())); + +      if (MFI.hasCalls()) +        report_fatal_error("call in graphics shader with too many input SGPRs"); + +      for (unsigned Reg : AMDGPU::SGPR_32RegClass) { +        if (!MRI.isLiveIn(Reg)) { +          Info.setStackPtrOffsetReg(Reg); +          break; +        } +      } + +      if (Info.getStackPtrOffsetReg() == AMDGPU::SP_REG) +        report_fatal_error("failed to find register for SP");      } + +    Info.setScratchWaveOffsetReg(ReservedOffsetReg); +    Info.setFrameOffsetReg(ReservedOffsetReg); +  } else if (RequiresStackAccess) { +    assert(!MFI.hasCalls()); +    // We know there are accesses and they will be done relative to SP, so just +    // pin it to the input. +    // +    // FIXME: Should not do this if inline asm is reading/writing these +    // registers. +    unsigned PreloadedSP = Info.getPreloadedReg( +        AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); + +    Info.setStackPtrOffsetReg(PreloadedSP); +    Info.setScratchWaveOffsetReg(PreloadedSP); +    Info.setFrameOffsetReg(PreloadedSP); +  } else { +    assert(!MFI.hasCalls()); + +    // There may not be stack access at all. There may still be spills, or +    // access of a constant pointer (in which cases an extra copy will be +    // emitted in the prolog). +    unsigned ReservedOffsetReg +      = TRI.reservedPrivateSegmentWaveByteOffsetReg(MF); +    Info.setStackPtrOffsetReg(ReservedOffsetReg); +    Info.setScratchWaveOffsetReg(ReservedOffsetReg); +    Info.setFrameOffsetReg(ReservedOffsetReg);    }  } @@ -9939,7 +9960,6 @@ SITargetLowering::getConstraintType(StringRef Constraint) const {  void SITargetLowering::finalizeLowering(MachineFunction &MF) const {    MachineRegisterInfo &MRI = MF.getRegInfo();    SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); -  const MachineFrameInfo &MFI = MF.getFrameInfo();    const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();    if (Info->isEntryFunction()) { @@ -9947,24 +9967,10 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {      reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);    } -  // We have to assume the SP is needed in case there are calls in the function -  // during lowering. Calls are only detected after the function is -  // lowered. We're about to reserve registers, so don't bother using it if we -  // aren't really going to use it. -  bool NeedSP = !Info->isEntryFunction() || -    MFI.hasVarSizedObjects() || -    MFI.hasCalls(); - -  if (NeedSP) { -    unsigned ReservedStackPtrOffsetReg = TRI->reservedStackPtrOffsetReg(MF); -    Info->setStackPtrOffsetReg(ReservedStackPtrOffsetReg); - -    assert(Info->getStackPtrOffsetReg() != Info->getFrameOffsetReg()); -    assert(!TRI->isSubRegister(Info->getScratchRSrcReg(), -                               Info->getStackPtrOffsetReg())); -    if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG) -      MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg()); -  } +  assert(!TRI->isSubRegister(Info->getScratchRSrcReg(), +                             Info->getStackPtrOffsetReg())); +  if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG) +    MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());    // We need to worry about replacing the default register with itself in case    // of MIR testcases missing the MFI. diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 1c3c52ba02c..48257b01b86 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -928,7 +928,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,        .addFrameIndex(FrameIndex)               // addr        .addMemOperand(MMO)        .addReg(MFI->getScratchRSrcReg(), RegState::Implicit) -      .addReg(MFI->getFrameOffsetReg(), RegState::Implicit); +      .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);      // Add the scratch resource registers as implicit uses because we may end up      // needing them, and need to ensure that the reserved registers are      // correctly handled. @@ -950,7 +950,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,      .addReg(SrcReg, getKillRegState(isKill)) // data      .addFrameIndex(FrameIndex)               // addr      .addReg(MFI->getScratchRSrcReg())        // scratch_rsrc -    .addReg(MFI->getFrameOffsetReg())        // scratch_offset +    .addReg(MFI->getStackPtrOffsetReg())     // scratch_offset      .addImm(0)                               // offset      .addMemOperand(MMO);  } @@ -1032,7 +1032,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,        .addFrameIndex(FrameIndex) // addr        .addMemOperand(MMO)        .addReg(MFI->getScratchRSrcReg(), RegState::Implicit) -      .addReg(MFI->getFrameOffsetReg(), RegState::Implicit); +      .addReg(MFI->getStackPtrOffsetReg(), RegState::Implicit);      if (ST.hasScalarStores()) {        // m0 is used for offset to scalar stores if used to spill. @@ -1046,10 +1046,10 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,    unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);    BuildMI(MBB, MI, DL, get(Opcode), DestReg) -    .addFrameIndex(FrameIndex)        // vaddr -    .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc -    .addReg(MFI->getFrameOffsetReg()) // scratch_offset -    .addImm(0)                        // offset +    .addFrameIndex(FrameIndex)           // vaddr +    .addReg(MFI->getScratchRSrcReg())    // scratch_rsrc +    .addReg(MFI->getStackPtrOffsetReg()) // scratch_offset +    .addImm(0)                           // offset      .addMemOperand(MMO);  } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 7e09f41aa8d..bfe6182a7c1 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -444,7 +444,8 @@ public:    }    unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const { -    return ArgInfo.getPreloadedValue(Value).first->getRegister(); +    auto Arg = ArgInfo.getPreloadedValue(Value).first; +    return Arg ? Arg->getRegister() : 0;    }    unsigned getGITPtrHigh() const { @@ -486,6 +487,11 @@ public:      return FrameOffsetReg;    } +  void setFrameOffsetReg(unsigned Reg) { +    assert(Reg != 0 && "Should never be unset"); +    FrameOffsetReg = Reg; +  } +    void setStackPtrOffsetReg(unsigned Reg) {      assert(Reg != 0 && "Should never be unset");      StackPtrOffsetReg = Reg; @@ -502,8 +508,6 @@ public:    void setScratchWaveOffsetReg(unsigned Reg) {      assert(Reg != 0 && "Should never be unset");      ScratchWaveOffsetReg = Reg; -    if (isEntryFunction()) -      FrameOffsetReg = ScratchWaveOffsetReg;    }    unsigned getQueuePtrUserSGPR() const { diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 2e96b986667..520d5198c5f 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -138,11 +138,6 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(    return AMDGPU::SGPR_32RegClass.getRegister(Reg);  } -unsigned SIRegisterInfo::reservedStackPtrOffsetReg( -  const MachineFunction &MF) const { -  return AMDGPU::SGPR32; -} -  BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {    BitVector Reserved(getNumRegs()); @@ -718,6 +713,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,    if (SpillToSMEM && OnlyToVGPR)      return false; +  unsigned FrameReg = getFrameRegister(*MF); +    assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&                           SuperReg != MFI->getFrameOffsetReg() &&                           SuperReg != MFI->getScratchWaveOffsetReg())); @@ -777,11 +774,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,        int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);        if (Offset != 0) {          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) -          .addReg(MFI->getFrameOffsetReg()) +          .addReg(FrameReg)            .addImm(Offset);        } else {          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) -          .addReg(MFI->getFrameOffsetReg()); +          .addReg(FrameReg);        }        BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp)) @@ -849,11 +846,11 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,          = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,                                     EltSize, MinAlign(Align, EltSize * i));        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE)) -        .addReg(TmpReg, RegState::Kill)    // src -        .addFrameIndex(Index)              // vaddr -        .addReg(MFI->getScratchRSrcReg())  // srrsrc -        .addReg(MFI->getFrameOffsetReg())  // soffset -        .addImm(i * 4)                     // offset +        .addReg(TmpReg, RegState::Kill)       // src +        .addFrameIndex(Index)                 // vaddr +        .addReg(MFI->getScratchRSrcReg())     // srrsrc +        .addReg(MFI->getStackPtrOffsetReg())  // soffset +        .addImm(i * 4)                        // offset          .addMemOperand(MMO);      }    } @@ -909,6 +906,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,    unsigned EltSize = 4;    unsigned ScalarLoadOp; +  unsigned FrameReg = getFrameRegister(*MF); +    const TargetRegisterClass *RC = getPhysRegClass(SuperReg);    if (SpillToSMEM && isSGPRClass(RC)) {      // XXX - if private_element_size is larger than 4 it might be useful to be @@ -940,11 +939,11 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,        int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);        if (Offset != 0) {          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg) -          .addReg(MFI->getFrameOffsetReg()) +          .addReg(FrameReg)            .addImm(Offset);        } else {          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg) -          .addReg(MFI->getFrameOffsetReg()); +          .addReg(FrameReg);        }        auto MIB = @@ -988,10 +987,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,          MinAlign(Align, EltSize * i));        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg) -        .addFrameIndex(Index)              // vaddr -        .addReg(MFI->getScratchRSrcReg())  // srsrc -        .addReg(MFI->getFrameOffsetReg())  // soffset -        .addImm(i * 4)                     // offset +        .addFrameIndex(Index)                 // vaddr +        .addReg(MFI->getScratchRSrcReg())     // srsrc +        .addReg(MFI->getStackPtrOffsetReg())  // soffset +        .addImm(i * 4)                        // offset          .addMemOperand(MMO);        auto MIB = @@ -1056,6 +1055,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,    MachineOperand &FIOp = MI->getOperand(FIOperandNum);    int Index = MI->getOperand(FIOperandNum).getIndex(); +  unsigned FrameReg = getFrameRegister(*MF); +    switch (MI->getOpcode()) {      // SGPR register spill      case AMDGPU::SI_SPILL_S512_SAVE: @@ -1091,11 +1092,14 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,      case AMDGPU::SI_SPILL_V32_SAVE: {        const MachineOperand *VData = TII->getNamedOperand(*MI,                                                           AMDGPU::OpName::vdata); +      assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == +             MFI->getStackPtrOffsetReg()); +        buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,              Index,              VData->getReg(), VData->isKill(),              TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), -            TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(), +            FrameReg,              TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),              *MI->memoperands_begin(),              RS); @@ -1112,12 +1116,14 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,      case AMDGPU::SI_SPILL_V512_RESTORE: {        const MachineOperand *VData = TII->getNamedOperand(*MI,                                                           AMDGPU::OpName::vdata); +      assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == +             MFI->getStackPtrOffsetReg());        buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,              Index,              VData->getReg(), VData->isKill(),              TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), -            TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(), +            FrameReg,              TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),              *MI->memoperands_begin(),              RS); @@ -1129,13 +1135,12 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,        const DebugLoc &DL = MI->getDebugLoc();        bool IsMUBUF = TII->isMUBUF(*MI); -      if (!IsMUBUF && -          MFI->getFrameOffsetReg() != MFI->getScratchWaveOffsetReg()) { +      if (!IsMUBUF && !MFI->isEntryFunction()) {          // Convert to an absolute stack address by finding the offset from the          // scratch wave base and scaling by the wave size.          // -        // In an entry function/kernel the stack address is already the -        // absolute address relative to the scratch wave offset. +        // In an entry function/kernel the offset is already the absolute +        // address relative to the frame register.          unsigned DiffReg            = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); @@ -1146,7 +1151,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,            MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), DiffReg) -          .addReg(MFI->getFrameOffsetReg()) +          .addReg(FrameReg)            .addReg(MFI->getScratchWaveOffsetReg());          int64_t Offset = FrameInfo.getObjectOffset(Index); @@ -1196,8 +1201,10 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,                 AMDGPU::getNamedOperandIdx(MI->getOpcode(),                                            AMDGPU::OpName::vaddr)); -        assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() -               == MFI->getFrameOffsetReg()); +        assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() == +               MFI->getStackPtrOffsetReg()); + +        TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->setReg(FrameReg);          int64_t Offset = FrameInfo.getObjectOffset(Index);          int64_t OldImm diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index de10e92c965..9780824683b 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -56,8 +56,6 @@ public:    unsigned reservedPrivateSegmentWaveByteOffsetReg(      const MachineFunction &MF) const; -  unsigned reservedStackPtrOffsetReg(const MachineFunction &MF) const; -    BitVector getReservedRegs(const MachineFunction &MF) const override;    const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;  | 

