diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-11-30 21:15:53 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-11-30 21:15:53 +0000 |
commit | 0e3d38937e11408127c1ae07d89f5189df204405 (patch) | |
tree | cd773950ee36f9811b2a4c50a390a46398f26272 /llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll | |
parent | ff6da2fe894f52abcddd7bfbf2a211d51867eb88 (diff) | |
download | bcm5719-llvm-0e3d38937e11408127c1ae07d89f5189df204405.tar.gz bcm5719-llvm-0e3d38937e11408127c1ae07d89f5189df204405.zip |
AMDGPU: Remove SIPrepareScratchRegs
It does not work because of emergency stack slots.
This pass was supposed to eliminate dummy registers for the
spill instructions, but the register scavenger can introduce
more during PrologEpilogInserter, so some would end up
left behind if they were needed.
The potential for spilling the scratch resource descriptor
and offset register makes doing something like this
overly complicated. Reserve registers to use for the resource
descriptor and use them directly in eliminateFrameIndex.
Also removes creating another scratch resource descriptor
when directly selecting scratch MUBUF instructions.
The choice of which registers are reserved is temporary.
For now it attempts to pick the next available registers
after the user and system SGPRs.
llvm-svn: 254329
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll new file mode 100644 index 00000000000..5e8cf5bb3d2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll @@ -0,0 +1,42 @@ +; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s +; XUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s +; XUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s + +; FIXME: align on alloca seems to be ignored for private_segment_alignment + +; ALL-LABEL: {{^}}large_alloca_compute_shader: + +; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 +; GCN: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 +; GCN: s_mov_b32 s14, -1 +; CI: s_mov_b32 s15, 0x80f000 +; VI: s_mov_b32 s15, 0x800000 + + +; GCNHSA: .amd_kernel_code_t +; GCNHSA: private_segment_alignment = 4 +; GCNHSA: .end_amd_kernel_code_t + +; GCNHSA: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; GCNHSA: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; GCNHSA: s_mov_b32 s10, -1 +; CIHSA: s_mov_b32 s11, 0x180f000 +; VIHSA: s_mov_b32 s11, 0x11800000 + +; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s6 offen +; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s6 offen + +; Scratch size = alloca size + emergency stack slot +; ALL: ; ScratchSize: 32772 +define void @large_alloca_compute_shader(i32 %x, i32 %y) #0 { + %large = alloca [8192 x i32], align 4 + %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191 + store volatile i32 %x, i32* %gep + %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y + %val = load volatile i32, i32* %gep1 + store volatile i32 %val, i32 addrspace(1)* undef + ret void +} + +attributes #0 = { nounwind } |