summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2015-11-30 21:15:53 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2015-11-30 21:15:53 +0000
commit0e3d38937e11408127c1ae07d89f5189df204405 (patch)
treecd773950ee36f9811b2a4c50a390a46398f26272 /llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
parentff6da2fe894f52abcddd7bfbf2a211d51867eb88 (diff)
downloadbcm5719-llvm-0e3d38937e11408127c1ae07d89f5189df204405.tar.gz
bcm5719-llvm-0e3d38937e11408127c1ae07d89f5189df204405.zip
AMDGPU: Remove SIPrepareScratchRegs
It does not work because of emergency stack slots. This pass was supposed to eliminate dummy registers for the spill instructions, but the register scavenger can introduce more during PrologEpilogInserter, so some would end up left behind if they were needed. The potential for spilling the scratch resource descriptor and offset register makes doing something like this overly complicated. Reserve registers to use for the resource descriptor and use them directly in eliminateFrameIndex. Also removes creating another scratch resource descriptor when directly selecting scratch MUBUF instructions. The choice of which registers are reserved is temporary. For now it attempts to pick the next available registers after the user and system SGPRs. llvm-svn: 254329
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll42
1 files changed, 42 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
new file mode 100644
index 00000000000..5e8cf5bb3d2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
+; XUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s
+; XUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s
+
+; FIXME: align on alloca seems to be ignored for private_segment_alignment
+
+; ALL-LABEL: {{^}}large_alloca_compute_shader:
+
+; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GCN: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GCN: s_mov_b32 s14, -1
+; CI: s_mov_b32 s15, 0x80f000
+; VI: s_mov_b32 s15, 0x800000
+
+
+; GCNHSA: .amd_kernel_code_t
+; GCNHSA: private_segment_alignment = 4
+; GCNHSA: .end_amd_kernel_code_t
+
+; GCNHSA: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GCNHSA: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GCNHSA: s_mov_b32 s10, -1
+; CIHSA: s_mov_b32 s11, 0x180f000
+; VIHSA: s_mov_b32 s11, 0x11800000
+
+; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s6 offen
+; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s6 offen
+
+; Scratch size = alloca size + emergency stack slot
+; ALL: ; ScratchSize: 32772
+define void @large_alloca_compute_shader(i32 %x, i32 %y) #0 {
+ %large = alloca [8192 x i32], align 4
+ %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
+ store volatile i32 %x, i32* %gep
+ %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
+ %val = load volatile i32, i32* %gep1
+ store volatile i32 %val, i32 addrspace(1)* undef
+ ret void
+}
+
+attributes #0 = { nounwind }
OpenPOWER on IntegriCloud