diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 25 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 2 |
4 files changed, 19 insertions, 28 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 1eb9b83456c..716c3879478 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -591,25 +591,12 @@ unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const { } unsigned GCNSubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const { - if (VGPRs <= 24) - return 10; - if (VGPRs <= 28) - return 9; - if (VGPRs <= 32) - return 8; - if (VGPRs <= 36) - return 7; - if (VGPRs <= 40) - return 6; - if (VGPRs <= 48) - return 5; - if (VGPRs <= 64) - return 4; - if (VGPRs <= 84) - return 3; - if (VGPRs <= 128) - return 2; - return 1; + unsigned MaxWaves = getMaxWavesPerEU(); + unsigned Granule = getVGPRAllocGranule(); + if (VGPRs < Granule) + return MaxWaves; + unsigned RoundedRegs = ((VGPRs + Granule - 1) / Granule) * Granule; + return std::min(getTotalNumVGPRs() / RoundedRegs, MaxWaves); } unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index 78c3b823946..1f30d76de0d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -884,7 +884,7 @@ public: /// \returns Maximum number of waves per execution unit supported by the /// subtarget without any kind of limitation. unsigned getMaxWavesPerEU() const { - return AMDGPU::IsaInfo::getMaxWavesPerEU(); + return AMDGPU::IsaInfo::getMaxWavesPerEU(this); } /// \returns Number of waves per work group supported by the subtarget and diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index e90f40e6abe..ef50d3754dd 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -241,7 +241,7 @@ unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, } unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) { - return getMaxWavesPerEU() * getEUsPerCU(STI); + return getMaxWavesPerEU(STI) * getEUsPerCU(STI); } unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI, @@ -253,9 +253,11 @@ unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { return 1; } -unsigned getMaxWavesPerEU() { +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { // FIXME: Need to take scratch memory into account. - return 10; + if (!isGFX10(*STI)) + return 10; + return 20; } unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI, @@ -317,7 +319,7 @@ unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { if (Version.Major >= 10) return 0; - if (WavesPerEU >= getMaxWavesPerEU()) + if (WavesPerEU >= getMaxWavesPerEU(STI)) return 0; unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); @@ -394,17 +396,19 @@ unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, } unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { - return 256; + if (!isGFX10(*STI)) + return 256; + return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512; } unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { - return getTotalNumVGPRs(STI); + return 256; } unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { assert(WavesPerEU != 0); - if (WavesPerEU >= getMaxWavesPerEU()) + if (WavesPerEU >= getMaxWavesPerEU(STI)) return 0; unsigned MinNumVGPRs = alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1), diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 209ef7eef74..590df20b4c2 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -94,7 +94,7 @@ unsigned getMinWavesPerEU(const MCSubtargetInfo *STI); /// \returns Maximum number of waves per execution unit for given subtarget \p /// STI without any kind of limitation. -unsigned getMaxWavesPerEU(); +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI); /// \returns Maximum number of waves per execution unit for given subtarget \p /// STI and limited by given \p FlatWorkGroupSize. |