AMDGPU: Correct maximum possible private allocation size

We were assuming a much larger possible per-wave visible stack allocation than is possible: https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/faa3ae51388517353afcdaf9c16621f879ef0a59/src/core/runtime/amd_gpu_agent.cpp#L70 Based on this, we can assume the high 15 bits of a frame index or sret are 0. The frame index value is the per-lane offset, so the maximum frame index value is MAX_WAVE_SCRATCH / wavesize. Remove the corresponding subtarget feature and option that made this configurable. llvm-svn: 361541
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2019-05-23 19:38:14 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2019-05-23 19:38:14 +0000
commit: 5c714cbdd83166e10b27b8e5ea2700654da2e90b (patch)
tree: 5f0da14a93817c94f6dcef9d2c2c2cbc9af41986 /llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
parent: e4b27869c60cb1311c4396e057c21573e19e62cd (diff)
download: bcm5719-llvm-5c714cbdd83166e10b27b8e5ea2700654da2e90b.tar.gz
bcm5719-llvm-5c714cbdd83166e10b27b8e5ea2700654da2e90b.zip
1 files changed, 21 insertions, 10 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll b/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
index 8e54dcbd169..dfd75235f80 100644
--- a/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
@@ -1,31 +1,42 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
-; GCN-LABEL: {{^}}scratch_buffer_known_high_bit_small:
+; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo16:
+; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
+; GCN: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xfffc, [[FI]]
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
+define amdgpu_kernel void @scratch_buffer_known_high_masklo16() #0 {
+  %alloca = alloca i32, align 4, addrspace(5)
+  store volatile i32 0, i32 addrspace(5)* %alloca
+  %toint = ptrtoint i32 addrspace(5)* %alloca to i32
+  %masked = and i32 %toint, 65535
+  store volatile i32 %masked, i32 addrspace(1)* undef
+  ret void
+}
+
+; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo17:
 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
 ; GCN-NOT: [[FI]]
 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
-define amdgpu_kernel void @scratch_buffer_known_high_bit_small() #0 {
+define amdgpu_kernel void @scratch_buffer_known_high_masklo17() #0 {
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 0, i32 addrspace(5)* %alloca
   %toint = ptrtoint i32 addrspace(5)* %alloca to i32
-  %masked = and i32 %toint, 2147483647
+  %masked = and i32 %toint, 131071
   store volatile i32 %masked, i32 addrspace(1)* undef
   ret void
 }
 
-; GCN-LABEL: {{^}}scratch_buffer_known_high_bit_huge:
+; GCN-LABEL: {{^}}scratch_buffer_known_high_mask18:
 ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
-; GCN-DAG: buffer_store_dword
-; GCN-DAG: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x7ffffffc, [[FI]]
-; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
-define amdgpu_kernel void @scratch_buffer_known_high_bit_huge() #1 {
+; GCN-NOT: [[FI]]
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
+define amdgpu_kernel void @scratch_buffer_known_high_mask18() #0 {
   %alloca = alloca i32, align 4, addrspace(5)
   store volatile i32 0, i32 addrspace(5)* %alloca
   %toint = ptrtoint i32 addrspace(5)* %alloca to i32
-  %masked = and i32 %toint, 2147483647
+  %masked = and i32 %toint, 262143
   store volatile i32 %masked, i32 addrspace(1)* undef
   ret void
 }
 
 attributes #0 = { nounwind }
-attributes #1 = { nounwind "target-features"="+huge-private-buffer" }
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2019-05-23 19:38:14 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2019-05-23 19:38:14 +0000
commit	5c714cbdd83166e10b27b8e5ea2700654da2e90b (patch)
tree	5f0da14a93817c94f6dcef9d2c2c2cbc9af41986 /llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
parent	e4b27869c60cb1311c4396e057c21573e19e62cd (diff)
download	bcm5719-llvm-5c714cbdd83166e10b27b8e5ea2700654da2e90b.tar.gz bcm5719-llvm-5c714cbdd83166e10b27b8e5ea2700654da2e90b.zip