AMDGPU: Remove SIPrepareScratchRegs

It does not work because of emergency stack slots. This pass was supposed to eliminate dummy registers for the spill instructions, but the register scavenger can introduce more during PrologEpilogInserter, so some would end up left behind if they were needed. The potential for spilling the scratch resource descriptor and offset register makes doing something like this overly complicated. Reserve registers to use for the resource descriptor and use them directly in eliminateFrameIndex. Also removes creating another scratch resource descriptor when directly selecting scratch MUBUF instructions. The choice of which registers are reserved is temporary. For now it attempts to pick the next available registers after the user and system SGPRs. llvm-svn: 254329
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2015-11-30 21:15:53 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2015-11-30 21:15:53 +0000
commit: 0e3d38937e11408127c1ae07d89f5189df204405 (patch)
tree: cd773950ee36f9811b2a4c50a390a46398f26272 /llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
parent: ff6da2fe894f52abcddd7bfbf2a211d51867eb88 (diff)
download: bcm5719-llvm-0e3d38937e11408127c1ae07d89f5189df204405.tar.gz
bcm5719-llvm-0e3d38937e11408127c1ae07d89f5189df204405.zip
1 files changed, 42 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
new file mode 100644
index 00000000000..5e8cf5bb3d2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
+; XUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s
+; XUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s
+
+; FIXME: align on alloca seems to be ignored for private_segment_alignment
+
+; ALL-LABEL: {{^}}large_alloca_compute_shader:
+
+; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GCN: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GCN: s_mov_b32 s14, -1
+; CI: s_mov_b32 s15, 0x80f000
+; VI: s_mov_b32 s15, 0x800000
+
+
+; GCNHSA: .amd_kernel_code_t
+; GCNHSA: private_segment_alignment = 4
+; GCNHSA: .end_amd_kernel_code_t
+
+; GCNHSA: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GCNHSA: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GCNHSA: s_mov_b32 s10, -1
+; CIHSA: s_mov_b32 s11, 0x180f000
+; VIHSA: s_mov_b32 s11, 0x11800000
+
+; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s6 offen
+; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s6 offen
+
+; Scratch size = alloca size + emergency stack slot
+; ALL: ; ScratchSize: 32772
+define void @large_alloca_compute_shader(i32 %x, i32 %y) #0 {
+  %large = alloca [8192 x i32], align 4
+  %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
+  store volatile i32 %x, i32* %gep
+  %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
+  %val = load volatile i32, i32* %gep1
+  store volatile i32 %val, i32 addrspace(1)* undef
+  ret void
+}
+
+attributes #0 = { nounwind  }
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2015-11-30 21:15:53 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2015-11-30 21:15:53 +0000
commit	0e3d38937e11408127c1ae07d89f5189df204405 (patch)
tree	cd773950ee36f9811b2a4c50a390a46398f26272 /llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll
parent	ff6da2fe894f52abcddd7bfbf2a211d51867eb88 (diff)
download	bcm5719-llvm-0e3d38937e11408127c1ae07d89f5189df204405.tar.gz bcm5719-llvm-0e3d38937e11408127c1ae07d89f5189df204405.zip