diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIFrameLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 175 | 
1 files changed, 85 insertions, 90 deletions
| diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 4b2124b14c0..7f3150bdd01 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -164,34 +164,29 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(    return ScratchRsrcReg;  } -// Shift down registers reserved for the scratch wave offset and stack pointer -// SGPRs. -std::pair<unsigned, unsigned> -SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( -  const GCNSubtarget &ST, -  const SIInstrInfo *TII, -  const SIRegisterInfo *TRI, -  SIMachineFunctionInfo *MFI, -  MachineFunction &MF) const { +// Shift down registers reserved for the scratch wave offset. +unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( +    const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, +    SIMachineFunctionInfo *MFI, MachineFunction &MF) const {    MachineRegisterInfo &MRI = MF.getRegInfo();    unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg(); +  assert(MFI->isEntryFunction()); +    // No replacement necessary.    if (ScratchWaveOffsetReg == AMDGPU::NoRegister || -      !MRI.isPhysRegUsed(ScratchWaveOffsetReg)) { -    assert(MFI->getStackPtrOffsetReg() == AMDGPU::SP_REG); -    return std::make_pair(AMDGPU::NoRegister, AMDGPU::NoRegister); +      (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) { +    return AMDGPU::NoRegister;    } -  unsigned SPReg = MFI->getStackPtrOffsetReg();    if (ST.hasSGPRInitBug()) -    return std::make_pair(ScratchWaveOffsetReg, SPReg); +    return ScratchWaveOffsetReg;    unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();    ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF);    if (NumPreloaded > AllSGPRs.size()) -    return std::make_pair(ScratchWaveOffsetReg, SPReg); +    return ScratchWaveOffsetReg;    AllSGPRs = AllSGPRs.slice(NumPreloaded); @@ -212,7 +207,7 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(    unsigned ReservedRegCount = 13;    if (AllSGPRs.size() < ReservedRegCount) -    return std::make_pair(ScratchWaveOffsetReg, SPReg); +    return ScratchWaveOffsetReg;    bool HandledScratchWaveOffsetReg =      ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF); @@ -225,14 +220,20 @@ SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(          HandledScratchWaveOffsetReg = true;          MRI.replaceRegWith(ScratchWaveOffsetReg, Reg); +        if (MFI->getScratchWaveOffsetReg() == MFI->getStackPtrOffsetReg()) { +          assert(!hasFP(MF)); +          MFI->setStackPtrOffsetReg(Reg); +        } +          MFI->setScratchWaveOffsetReg(Reg); +        MFI->setFrameOffsetReg(Reg);          ScratchWaveOffsetReg = Reg;          break;        }      }    } -  return std::make_pair(ScratchWaveOffsetReg, SPReg); +  return ScratchWaveOffsetReg;  }  void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, @@ -265,38 +266,11 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,    if (MFI->hasFlatScratchInit())      emitFlatScratchInit(ST, MF, MBB); -  unsigned SPReg = MFI->getStackPtrOffsetReg(); -  if (SPReg != AMDGPU::SP_REG) { -    assert(MRI.isReserved(SPReg) && "SPReg used but not reserved"); - -    DebugLoc DL; -    const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); -    int64_t StackSize = FrameInfo.getStackSize(); - -    if (StackSize == 0) { -      BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::COPY), SPReg) -        .addReg(MFI->getScratchWaveOffsetReg()); -    } else { -      BuildMI(MBB, MBB.begin(), DL, TII->get(AMDGPU::S_ADD_U32), SPReg) -        .addReg(MFI->getScratchWaveOffsetReg()) -        .addImm(StackSize * ST.getWavefrontSize()); -    } -  } -    unsigned ScratchRsrcReg      = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); -  unsigned ScratchWaveOffsetReg; -  std::tie(ScratchWaveOffsetReg, SPReg) -    = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); - -  // It's possible to have uses of only ScratchWaveOffsetReg without -  // ScratchRsrcReg if it's only used for the initialization of flat_scratch, -  // but the inverse is not true. -  if (ScratchWaveOffsetReg == AMDGPU::NoRegister) { -    assert(ScratchRsrcReg == AMDGPU::NoRegister); -    return; -  } +  unsigned ScratchWaveOffsetReg = +      getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);    // We need to insert initialization of the scratch resource descriptor.    unsigned PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( @@ -308,18 +282,19 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,        AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);    } -  bool OffsetRegUsed = MRI.isPhysRegUsed(ScratchWaveOffsetReg); +  bool OffsetRegUsed = ScratchWaveOffsetReg != AMDGPU::NoRegister && +                       MRI.isPhysRegUsed(ScratchWaveOffsetReg);    bool ResourceRegUsed = ScratchRsrcReg != AMDGPU::NoRegister &&                           MRI.isPhysRegUsed(ScratchRsrcReg); +  // FIXME: Hack to not crash in situations which emitted an error. +  if (PreloadedScratchWaveOffsetReg == AMDGPU::NoRegister) +    return; +    // We added live-ins during argument lowering, but since they were not used    // they were deleted. We're adding the uses now, so add them back. -  if (OffsetRegUsed) { -    assert(PreloadedScratchWaveOffsetReg != AMDGPU::NoRegister && -           "scratch wave offset input is required"); -    MRI.addLiveIn(PreloadedScratchWaveOffsetReg); -    MBB.addLiveIn(PreloadedScratchWaveOffsetReg); -  } +  MRI.addLiveIn(PreloadedScratchWaveOffsetReg); +  MBB.addLiveIn(PreloadedScratchWaveOffsetReg);    if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {      assert(ST.isAmdHsaOrMesa(F) || ST.isMesaGfxShader(F)); @@ -360,11 +335,16 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,        .addReg(PreloadedPrivateBufferReg, RegState::Kill);    } -  if (OffsetRegUsed && -      PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) { +  unsigned SPReg = MFI->getStackPtrOffsetReg(); +  assert(SPReg != AMDGPU::SP_REG); + +  // FIXME: Remove the isPhysRegUsed checks +  const bool HasFP = hasFP(MF); + +  if (HasFP || OffsetRegUsed) { +    assert(ScratchWaveOffsetReg);      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg) -      .addReg(PreloadedScratchWaveOffsetReg, -              MRI.isPhysRegUsed(ScratchWaveOffsetReg) ? 0 : RegState::Kill); +      .addReg(PreloadedScratchWaveOffsetReg, HasFP ? RegState::Kill : 0);    }    if (CopyBuffer && !CopyBufferFirst) { @@ -372,9 +352,26 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,        .addReg(PreloadedPrivateBufferReg, RegState::Kill);    } -  if (ResourceRegUsed) +  if (ResourceRegUsed) {      emitEntryFunctionScratchSetup(ST, MF, MBB, MFI, I,          PreloadedPrivateBufferReg, ScratchRsrcReg); +  } + +  if (HasFP) { +    DebugLoc DL; +    const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); +    int64_t StackSize = FrameInfo.getStackSize(); + +    // On kernel entry, the private scratch wave offset is the SP value. +    if (StackSize == 0) { +      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), SPReg) +        .addReg(MFI->getScratchWaveOffsetReg()); +    } else { +      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), SPReg) +        .addReg(MFI->getScratchWaveOffsetReg()) +        .addImm(StackSize * ST.getWavefrontSize()); +    } +  }  }  // Emit scratch setup code for AMDPAL or Mesa, assuming ResourceRegUsed is set. @@ -567,15 +564,12 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,    MachineBasicBlock::iterator MBBI = MBB.begin();    DebugLoc DL; -  // XXX - Is this the right predicate? - -  bool NeedFP = hasFP(MF); +  bool HasFP = false;    uint32_t NumBytes = MFI.getStackSize();    uint32_t RoundedSize = NumBytes; -  const bool NeedsRealignment = TRI.needsStackRealignment(MF); -  if (NeedsRealignment) { -    assert(NeedFP); +  if (TRI.needsStackRealignment(MF)) { +    HasFP = true;      const unsigned Alignment = MFI.getMaxAlignment();      RoundedSize += Alignment; @@ -599,7 +593,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,        .addImm(-Alignment * ST.getWavefrontSize())        .setMIFlag(MachineInstr::FrameSetup);      FuncInfo->setIsStackRealigned(true); -  } else if (NeedFP) { +  } else if ((HasFP = hasFP(MF))) {      // If we need a base pointer, set it up here. It's whatever the value of      // the stack pointer is at this point. Any variable size objects will be      // allocated after this, so we can still use the base pointer to reference @@ -609,7 +603,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,        .setMIFlag(MachineInstr::FrameSetup);    } -  if (RoundedSize != 0 && hasSP(MF)) { +  if (HasFP && RoundedSize != 0) {      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_U32), StackPtrReg)        .addReg(StackPtrReg)        .addImm(RoundedSize * ST.getWavefrontSize()) @@ -693,23 +687,17 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,        .addReg(ScratchExecCopy);    } -  unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg(); -  if (StackPtrReg == AMDGPU::NoRegister) -    return; - -  const MachineFrameInfo &MFI = MF.getFrameInfo(); -  uint32_t NumBytes = MFI.getStackSize(); - -  // FIXME: Clarify distinction between no set SP and SP. For callee functions, -  // it's really whether we need SP to be accurate or not. - -  if (NumBytes != 0 && hasSP(MF)) { +  if (hasFP(MF)) { +    const MachineFrameInfo &MFI = MF.getFrameInfo(); +    uint32_t NumBytes = MFI.getStackSize();      uint32_t RoundedSize = FuncInfo->isStackRealigned() ?        NumBytes + MFI.getMaxAlignment() : NumBytes; +    const unsigned StackPtrReg = FuncInfo->getStackPtrOffsetReg();      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_SUB_U32), StackPtrReg)        .addReg(StackPtrReg) -      .addImm(RoundedSize * ST.getWavefrontSize()); +      .addImm(RoundedSize * ST.getWavefrontSize()) +      .setMIFlag(MachineInstr::FrameDestroy);    }  } @@ -849,18 +837,25 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(  }  bool SIFrameLowering::hasFP(const MachineFunction &MF) const { -  // All stack operations are relative to the frame offset SGPR. -  // TODO: Still want to eliminate sometimes.    const MachineFrameInfo &MFI = MF.getFrameInfo(); +  if (MFI.hasCalls()) { +    // All offsets are unsigned, so need to be addressed in the same direction +    // as stack growth. +    if (MFI.getStackSize() != 0) +      return true; + +    // For the entry point, the input wave scratch offset must be copied to the +    // API SP if there are calls. +    if (MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) +      return true; + +    // Retain behavior of always omitting the FP for leaf functions when +    // possible. +    if (MF.getTarget().Options.DisableFramePointerElim(MF)) +      return true; +  } -  // XXX - Is this only called after frame is finalized? Should be able to check -  // frame size. -  return MFI.hasStackObjects() && !allStackObjectsAreDead(MFI); -} - -bool SIFrameLowering::hasSP(const MachineFunction &MF) const { -  const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo(); -  // All stack operations are relative to the frame offset SGPR. -  const MachineFrameInfo &MFI = MF.getFrameInfo(); -  return MFI.hasCalls() || MFI.hasVarSizedObjects() || TRI->needsStackRealignment(MF); +  return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || +    MFI.hasStackMap() || MFI.hasPatchPoint() || +    MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->needsStackRealignment(MF);  } | 

