diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-06-20 21:58:24 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-06-20 21:58:24 +0000 |
| commit | d88db6d7fc942947ad4a068b38c5b5af7d5d1751 (patch) | |
| tree | 670dc13f6ccde2424eded38229f3b60061f5e6a2 /llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll | |
| parent | 25f08a17c318e8ffbd30ecbab4d3ea5c5105ddbc (diff) | |
| download | bcm5719-llvm-d88db6d7fc942947ad4a068b38c5b5af7d5d1751.tar.gz bcm5719-llvm-d88db6d7fc942947ad4a068b38c5b5af7d5d1751.zip | |
AMDGPU: Always use s33 for global scratch wave offset
Every called function could possibly need this to calculate the
absolute address of stack objectst, and this avoids inserting a copy
around every call site in the kernel. It's also somewhat cleaner to
keep this in a callee saved SGPR.
llvm-svn: 363990
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll | 24 |
1 files changed, 13 insertions, 11 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index 69c8b53f55c..2ad3fd626a9 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -75,19 +75,19 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 { ; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}} ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4 -; GCN-DAG: v_writelane_b32 v32, s33, ; GCN-DAG: v_writelane_b32 v32, s34, ; GCN-DAG: v_writelane_b32 v32, s35, +; GCN-DAG: v_writelane_b32 v32, s36, ; GCN-DAG: v_mov_b32_e32 v0, 0{{$}} ; GCN-DAG: buffer_store_dword v0, off, s[0:3], s5{{$}} -; GCN-DAG: s_mov_b32 s33, s5 +; GCN-DAG: s_mov_b32 [[COPY_FP:s[0-9]+]], s5 ; GCN: s_swappc_b64 -; GCN-DAG: s_mov_b32 s5, s33 -; GCN-DAG: v_readlane_b32 s35, +; GCN-DAG: s_mov_b32 s5, [[COPY_FP]] ; GCN-DAG: v_readlane_b32 s34, -; GCN-DAG: v_readlane_b32 s33, +; GCN-DAG: v_readlane_b32 s35, +; GCN-DAG: v_readlane_b32 s36, ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; GCN: s_waitcnt ; GCN-NEXT: s_setpc_b64 @@ -110,14 +110,16 @@ define void @callee_with_stack_and_call() #0 { ; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-DAG: v_writelane_b32 v32, s33, 0 -; GCN-DAG: v_writelane_b32 v32, s34, 1 -; GCN: s_mov_b32 s33, s5 +; GCN-DAG: v_writelane_b32 v32, s34, 0 +; GCN-DAG: v_writelane_b32 v32, s35, 1 +; GCN-DAG: v_writelane_b32 v32, s36, 2 +; GCN-DAG: s_mov_b32 [[COPY_FP:s[0-9]+]], s5 ; GCN: s_swappc_b64 -; GCN: s_mov_b32 s5, s33 +; GCN: s_mov_b32 s5, [[COPY_FP]] -; GCN-DAG: v_readlane_b32 s34, v32, 1 -; GCN-DAG: v_readlane_b32 s33, v32, 0 +; GCN-DAG: v_readlane_b32 s34, v32, 0 +; GCN-DAG: v_readlane_b32 s35, v32, 1 +; GCN-DAG: v_readlane_b32 s36, v32, 2 ; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 ; 4-byte Folded Reload |

