From 2b913b1f493107ae7ffb6c11e59094952d595e1e Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Wed, 1 Feb 2017 22:59:50 +0000 Subject: [AMDGPU] Account workgroup size in LDS occupancy limits Functions matching LDS use to occupancy return results for a workgroup of 64 workitems. The numbers has to be adjusted for bigger workgroups. For example a workgroup of size 256 already occupies 4 waves just by itself. Given that all numbers of LDS use in the compiler are per workgroup, occupancy shall be multiplied by 4 in this case. Each 64 workitems still limited by the same number, but 4 subrgoups 64 workitems each can afford 4 times more LDS to get the same occupancy. In addition change initializes LDS size in the subtarget to a real value for SI+ targets. This is required since LDS size is a variable in these calculations. Differential Revision: https://reviews.llvm.org/D29423 llvm-svn: 293837 --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h') diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index f66ebd6afc2..83eda9bfbb6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -274,11 +274,12 @@ public: /// Return the amount of LDS that can be used that will not restrict the /// occupancy lower than WaveCount. - unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const; + unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, + const Function &) const; /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if /// the given LDS memory size is the only constraint. - unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const; + unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; bool hasFP16Denormals() const { return FP64FP16Denormals; -- cgit v1.2.3