diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 52 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 21 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.h | 2 |
3 files changed, 41 insertions, 34 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 672e49184a5..0ea8db04c29 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1067,15 +1067,15 @@ bool GCNTargetMachine::parseMachineFunctionInfo( auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A, const TargetRegisterClass &RC, - ArgDescriptor &Arg) { + ArgDescriptor &Arg, unsigned UserSGPRs, + unsigned SystemSGPRs) { // Skip parsing if it's not present. if (!A) return false; if (A->IsRegister) { unsigned Reg; - if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, - Error)) { + if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) { SourceRange = A->RegisterName.SourceRange; return true; } @@ -1088,60 +1088,62 @@ bool GCNTargetMachine::parseMachineFunctionInfo( if (A->Mask) Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue()); + MFI->NumUserSGPRs += UserSGPRs; + MFI->NumSystemSGPRs += SystemSGPRs; return false; }; if (YamlMFI.ArgInfo && (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer, AMDGPU::SReg_128RegClass, - MFI->ArgInfo.PrivateSegmentBuffer) || + MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr, - AMDGPU::SReg_64RegClass, - MFI->ArgInfo.DispatchPtr) || + AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr, + 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.QueuePtr) || + MFI->ArgInfo.QueuePtr, 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.KernargSegmentPtr) || + MFI->ArgInfo.KernargSegmentPtr, 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID, - AMDGPU::SReg_64RegClass, - MFI->ArgInfo.DispatchID) || + AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID, + 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.FlatScratchInit) || + MFI->ArgInfo.FlatScratchInit, 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize, AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.PrivateSegmentSize) || + MFI->ArgInfo.PrivateSegmentSize, 0, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX, - AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.WorkGroupIDX) || + AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX, + 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY, - AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.WorkGroupIDY) || + AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY, + 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ, - AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.WorkGroupIDZ) || + AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ, + 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo, AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.WorkGroupInfo) || + MFI->ArgInfo.WorkGroupInfo, 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset, AMDGPU::SGPR_32RegClass, - MFI->ArgInfo.PrivateSegmentWaveByteOffset) || + MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) || parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.ImplicitArgPtr) || + MFI->ArgInfo.ImplicitArgPtr, 0, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr, AMDGPU::SReg_64RegClass, - MFI->ArgInfo.ImplicitBufferPtr) || + MFI->ArgInfo.ImplicitBufferPtr, 2, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX, AMDGPU::VGPR_32RegClass, - MFI->ArgInfo.WorkItemIDX) || + MFI->ArgInfo.WorkItemIDX, 0, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY, AMDGPU::VGPR_32RegClass, - MFI->ArgInfo.WorkItemIDY) || + MFI->ArgInfo.WorkItemIDY, 0, 0) || parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ, AMDGPU::VGPR_32RegClass, - MFI->ArgInfo.WorkItemIDZ))) + MFI->ArgInfo.WorkItemIDZ, 0, 0))) return true; MFI->Mode.IEEE = YamlMFI.Mode.IEEE; diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 44647d8ba87..feab6bed260 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -311,7 +311,8 @@ unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg( } // Shift down registers reserved for the scratch wave offset. -unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( +std::pair<unsigned, bool> +SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const { MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -322,17 +323,17 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( // No replacement necessary. if (ScratchWaveOffsetReg == AMDGPU::NoRegister || (!hasFP(MF) && !MRI.isPhysRegUsed(ScratchWaveOffsetReg))) { - return AMDGPU::NoRegister; + return std::make_pair(AMDGPU::NoRegister, false); } if (ST.hasSGPRInitBug()) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, false); unsigned NumPreloaded = MFI->getNumPreloadedSGPRs(); ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(ST, MF); if (NumPreloaded > AllSGPRs.size()) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, false); AllSGPRs = AllSGPRs.slice(NumPreloaded); @@ -353,10 +354,11 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( unsigned ReservedRegCount = 13; if (AllSGPRs.size() < ReservedRegCount) - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, false); bool HandledScratchWaveOffsetReg = ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF); + bool FPAdjusted = false; for (MCPhysReg Reg : AllSGPRs.drop_back(ReservedRegCount)) { // Pick the first unallocated SGPR. Be careful not to pick an alias of the @@ -374,12 +376,13 @@ unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg( MFI->setScratchWaveOffsetReg(Reg); MFI->setFrameOffsetReg(Reg); ScratchWaveOffsetReg = Reg; + FPAdjusted = true; break; } } } - return ScratchWaveOffsetReg; + return std::make_pair(ScratchWaveOffsetReg, FPAdjusted); } void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, @@ -415,7 +418,9 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, unsigned ScratchRsrcReg = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF); - unsigned ScratchWaveOffsetReg = + unsigned ScratchWaveOffsetReg; + bool FPAdjusted; + std::tie(ScratchWaveOffsetReg, FPAdjusted) = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF); // We need to insert initialization of the scratch resource descriptor. @@ -453,7 +458,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, if (&OtherBB == &MBB) continue; - if (OffsetRegUsed) + if (OffsetRegUsed || FPAdjusted) OtherBB.addLiveIn(ScratchWaveOffsetReg); if (ResourceRegUsed) diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index 19543287148..c644f4726e2 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -66,7 +66,7 @@ private: SIMachineFunctionInfo *MFI, MachineFunction &MF) const; - unsigned getReservedPrivateSegmentWaveByteOffsetReg( + std::pair<unsigned, bool> getReservedPrivateSegmentWaveByteOffsetReg( const GCNSubtarget &ST, const SIInstrInfo *TII, const SIRegisterInfo *TRI, SIMachineFunctionInfo *MFI, MachineFunction &MF) const; |