diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll | 6 |
2 files changed, 9 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 02e47afeb91..0a1ab73d8dc 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -151,7 +151,11 @@ unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, unsigned FlatWorkGroupSize) { if (!Features.test(FeatureGCN)) return 8; - return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16; + unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); + if (N == 1) + return 40; + N = 40 / N; + return std::min(N, 16u); } unsigned getMaxWavesPerCU(const FeatureBitset &Features) { diff --git a/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll b/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll index 71d21b120f0..93b33736b2a 100644 --- a/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll @@ -69,7 +69,8 @@ entry: } ; ALL-LABEL: @occupancy_0( -; ALL: alloca [5 x i32] +; CI-NOT: alloca [5 x i32] +; SI: alloca [5 x i32] define void @occupancy_0(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #3 { entry: %stack = alloca [5 x i32], align 4 @@ -91,7 +92,8 @@ entry: } ; ALL-LABEL: @occupancy_max( -; ALL: alloca [5 x i32] +; CI-NOT: alloca [5 x i32] +; SI: alloca [5 x i32] define void @occupancy_max(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #4 { entry: %stack = alloca [5 x i32], align 4 |