diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-05-23 19:38:14 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-05-23 19:38:14 +0000 |
| commit | 5c714cbdd83166e10b27b8e5ea2700654da2e90b (patch) | |
| tree | 5f0da14a93817c94f6dcef9d2c2c2cbc9af41986 /llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll | |
| parent | e4b27869c60cb1311c4396e057c21573e19e62cd (diff) | |
| download | bcm5719-llvm-5c714cbdd83166e10b27b8e5ea2700654da2e90b.tar.gz bcm5719-llvm-5c714cbdd83166e10b27b8e5ea2700654da2e90b.zip | |
AMDGPU: Correct maximum possible private allocation size
We were assuming a much larger possible per-wave visible stack
allocation than is possible:
https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/faa3ae51388517353afcdaf9c16621f879ef0a59/src/core/runtime/amd_gpu_agent.cpp#L70
Based on this, we can assume the high 15 bits of a frame index or sret
are 0. The frame index value is the per-lane offset, so the maximum
frame index value is MAX_WAVE_SCRATCH / wavesize.
Remove the corresponding subtarget feature and option that made
this configurable.
llvm-svn: 361541
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll | 31 |
1 files changed, 21 insertions, 10 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll b/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll index 8e54dcbd169..dfd75235f80 100644 --- a/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll +++ b/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll @@ -1,31 +1,42 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -; GCN-LABEL: {{^}}scratch_buffer_known_high_bit_small: +; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo16: +; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4 +; GCN: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xfffc, [[FI]] +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] +define amdgpu_kernel void @scratch_buffer_known_high_masklo16() #0 { + %alloca = alloca i32, align 4, addrspace(5) + store volatile i32 0, i32 addrspace(5)* %alloca + %toint = ptrtoint i32 addrspace(5)* %alloca to i32 + %masked = and i32 %toint, 65535 + store volatile i32 %masked, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo17: ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4 ; GCN-NOT: [[FI]] ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]] -define amdgpu_kernel void @scratch_buffer_known_high_bit_small() #0 { +define amdgpu_kernel void @scratch_buffer_known_high_masklo17() #0 { %alloca = alloca i32, align 4, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca %toint = ptrtoint i32 addrspace(5)* %alloca to i32 - %masked = and i32 %toint, 2147483647 + %masked = and i32 %toint, 131071 store volatile i32 %masked, i32 addrspace(1)* undef ret void } -; GCN-LABEL: {{^}}scratch_buffer_known_high_bit_huge: +; GCN-LABEL: {{^}}scratch_buffer_known_high_mask18: ; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4 -; GCN-DAG: buffer_store_dword -; GCN-DAG: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x7ffffffc, [[FI]] -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] -define amdgpu_kernel void @scratch_buffer_known_high_bit_huge() #1 { +; GCN-NOT: [[FI]] +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]] +define amdgpu_kernel void @scratch_buffer_known_high_mask18() #0 { %alloca = alloca i32, align 4, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca %toint = ptrtoint i32 addrspace(5)* %alloca to i32 - %masked = and i32 %toint, 2147483647 + %masked = and i32 %toint, 262143 store volatile i32 %masked, i32 addrspace(1)* undef ret void } attributes #0 = { nounwind } -attributes #1 = { nounwind "target-features"="+huge-private-buffer" } |

