summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-05-23 19:38:14 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-05-23 19:38:14 +0000
commit5c714cbdd83166e10b27b8e5ea2700654da2e90b (patch)
tree5f0da14a93817c94f6dcef9d2c2c2cbc9af41986 /llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
parente4b27869c60cb1311c4396e057c21573e19e62cd (diff)
downloadbcm5719-llvm-5c714cbdd83166e10b27b8e5ea2700654da2e90b.tar.gz
bcm5719-llvm-5c714cbdd83166e10b27b8e5ea2700654da2e90b.zip
AMDGPU: Correct maximum possible private allocation size
We were assuming a much larger possible per-wave visible stack allocation than is possible: https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/faa3ae51388517353afcdaf9c16621f879ef0a59/src/core/runtime/amd_gpu_agent.cpp#L70 Based on this, we can assume the high 15 bits of a frame index or sret are 0. The frame index value is the per-lane offset, so the maximum frame index value is MAX_WAVE_SCRATCH / wavesize. Remove the corresponding subtarget feature and option that made this configurable. llvm-svn: 361541
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll31
1 files changed, 21 insertions, 10 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll b/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
index 8e54dcbd169..dfd75235f80 100644
--- a/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/huge-private-buffer.ll
@@ -1,31 +1,42 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; GCN-LABEL: {{^}}scratch_buffer_known_high_bit_small:
+; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo16:
+; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
+; GCN: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xfffc, [[FI]]
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
+define amdgpu_kernel void @scratch_buffer_known_high_masklo16() #0 {
+ %alloca = alloca i32, align 4, addrspace(5)
+ store volatile i32 0, i32 addrspace(5)* %alloca
+ %toint = ptrtoint i32 addrspace(5)* %alloca to i32
+ %masked = and i32 %toint, 65535
+ store volatile i32 %masked, i32 addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}scratch_buffer_known_high_masklo17:
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
; GCN-NOT: [[FI]]
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
-define amdgpu_kernel void @scratch_buffer_known_high_bit_small() #0 {
+define amdgpu_kernel void @scratch_buffer_known_high_masklo17() #0 {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
%toint = ptrtoint i32 addrspace(5)* %alloca to i32
- %masked = and i32 %toint, 2147483647
+ %masked = and i32 %toint, 131071
store volatile i32 %masked, i32 addrspace(1)* undef
ret void
}
-; GCN-LABEL: {{^}}scratch_buffer_known_high_bit_huge:
+; GCN-LABEL: {{^}}scratch_buffer_known_high_mask18:
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4
-; GCN-DAG: buffer_store_dword
-; GCN-DAG: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x7ffffffc, [[FI]]
-; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
-define amdgpu_kernel void @scratch_buffer_known_high_bit_huge() #1 {
+; GCN-NOT: [[FI]]
+; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FI]]
+define amdgpu_kernel void @scratch_buffer_known_high_mask18() #0 {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
%toint = ptrtoint i32 addrspace(5)* %alloca to i32
- %masked = and i32 %toint, 2147483647
+ %masked = and i32 %toint, 262143
store volatile i32 %masked, i32 addrspace(1)* undef
ret void
}
attributes #0 = { nounwind }
-attributes #1 = { nounwind "target-features"="+huge-private-buffer" }
OpenPOWER on IntegriCloud