diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 178 |
1 files changed, 173 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 53417fc6d76..78b451f9847 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUSubtarget.h" +#include "SIMachineFunctionInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/Target/TargetFrameLowering.h" @@ -322,12 +323,179 @@ unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { return 1; } -unsigned SISubtarget::getMaxNumSGPRs() const { +unsigned SISubtarget::getMinNumSGPRs(unsigned WavesPerEU) const { + if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + switch (WavesPerEU) { + case 0: return 0; + case 10: return 0; + case 9: return 0; + case 8: return 81; + default: return 97; + } + } else { + switch (WavesPerEU) { + case 0: return 0; + case 10: return 0; + case 9: return 49; + case 8: return 57; + case 7: return 65; + case 6: return 73; + case 5: return 81; + default: return 97; + } + } +} + +unsigned SISubtarget::getMaxNumSGPRs(unsigned WavesPerEU, + bool Addressable) const { + if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + switch (WavesPerEU) { + case 0: return 80; + case 10: return 80; + case 9: return 80; + case 8: return 96; + default: return Addressable ? getAddressableNumSGPRs() : 112; + } + } else { + switch (WavesPerEU) { + case 0: return 48; + case 10: return 48; + case 9: return 56; + case 8: return 64; + case 7: return 72; + case 6: return 80; + case 5: return 96; + default: return getAddressableNumSGPRs(); + } + } +} + +unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { + const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); + if (MFI.hasFlatScratchInit()) { + if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) + return 6; // FLAT_SCRATCH, XNACK, VCC (in that order). + if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) + return 4; // FLAT_SCRATCH, VCC (in that order). + } + + if (isXNACKEnabled()) + return 4; // XNACK, VCC (in that order). + return 2; // VCC. +} + +unsigned SISubtarget::getMaxNumSGPRs(const MachineFunction &MF) const { + const Function &F = *MF.getFunction(); + const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); + + // Compute maximum number of SGPRs function can use using default/requested + // minimum number of waves per execution unit. + std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); + unsigned MaxNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, false); + unsigned MaxAddressableNumSGPRs = getMaxNumSGPRs(WavesPerEU.first, true); + + // Check if maximum number of SGPRs was explicitly requested using + // "amdgpu-num-sgpr" attribute. + if (F.hasFnAttribute("amdgpu-num-sgpr")) { + unsigned Requested = AMDGPU::getIntegerAttribute( + F, "amdgpu-num-sgpr", MaxNumSGPRs); + + // Make sure requested value does not violate subtarget's specifications. + if (Requested && (Requested <= getReservedNumSGPRs(MF))) + Requested = 0; + + // If more SGPRs are required to support the input user/system SGPRs, + // increase to accommodate them. + // + // FIXME: This really ends up using the requested number of SGPRs + number + // of reserved special registers in total. Theoretically you could re-use + // the last input registers for these special registers, but this would + // require a lot of complexity to deal with the weird aliasing. + unsigned InputNumSGPRs = MFI.getNumPreloadedSGPRs(); + if (Requested && Requested < InputNumSGPRs) + Requested = InputNumSGPRs; + + // Make sure requested value is compatible with values implied by + // default/requested minimum/maximum number of waves per execution unit. + if (Requested && Requested > getMaxNumSGPRs(WavesPerEU.first, false)) + Requested = 0; + if (WavesPerEU.second && + Requested && Requested < getMinNumSGPRs(WavesPerEU.second)) + Requested = 0; + + if (Requested) + MaxNumSGPRs = Requested; + } + if (hasSGPRInitBug()) - return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG; + + return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF), + MaxAddressableNumSGPRs); +} + +unsigned SISubtarget::getMinNumVGPRs(unsigned WavesPerEU) const { + switch (WavesPerEU) { + case 0: return 0; + case 10: return 0; + case 9: return 25; + case 8: return 29; + case 7: return 33; + case 6: return 37; + case 5: return 41; + case 4: return 49; + case 3: return 65; + case 2: return 85; + default: return 129; + } +} - if (getGeneration() >= VOLCANIC_ISLANDS) - return 102; +unsigned SISubtarget::getMaxNumVGPRs(unsigned WavesPerEU) const { + switch (WavesPerEU) { + case 0: return 24; + case 10: return 24; + case 9: return 28; + case 8: return 32; + case 7: return 36; + case 6: return 40; + case 5: return 48; + case 4: return 64; + case 3: return 84; + case 2: return 128; + default: return getTotalNumVGPRs(); + } +} + +unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const { + const Function &F = *MF.getFunction(); + const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>(); + + // Compute maximum number of VGPRs function can use using default/requested + // minimum number of waves per execution unit. + std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU(); + unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first); + + // Check if maximum number of VGPRs was explicitly requested using + // "amdgpu-num-vgpr" attribute. + if (F.hasFnAttribute("amdgpu-num-vgpr")) { + unsigned Requested = AMDGPU::getIntegerAttribute( + F, "amdgpu-num-vgpr", MaxNumVGPRs); + + // Make sure requested value does not violate subtarget's specifications. + if (Requested && Requested <= getReservedNumVGPRs(MF)) + Requested = 0; + + // Make sure requested value is compatible with values implied by + // default/requested minimum/maximum number of waves per execution unit. + if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first)) + Requested = 0; + if (WavesPerEU.second && + Requested && Requested < getMinNumVGPRs(WavesPerEU.second)) + Requested = 0; + + if (Requested) + MaxNumVGPRs = Requested; + } - return 104; + return MaxNumVGPRs - getReservedNumVGPRs(MF); } |

