diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-05-24 18:18:51 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-05-24 18:18:51 +0000 |
| commit | 3d59e388ca252615beb573768015d32526fd1d56 (patch) | |
| tree | 564d416539423a35d470582ce1a05c7d56a7fd13 /llvm/test/CodeGen | |
| parent | 21efe2afed7b743f37780f39b090af6145b4d527 (diff) | |
| download | bcm5719-llvm-3d59e388ca252615beb573768015d32526fd1d56.tar.gz bcm5719-llvm-3d59e388ca252615beb573768015d32526fd1d56.zip | |
AMDGPU: Activate all lanes when spilling CSR VGPR for SGPR spills
If some lanes weren't active on entry to the function, this could
clobber their VGPR values.
llvm-svn: 361655
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll | 26 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll | 34 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/nested-calls.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/sibling-call.ll | 13 |
7 files changed, 69 insertions, 28 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll index c4b2561a8f2..cd1ce13eb16 100644 --- a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -30,11 +30,11 @@ entry: ; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf: ; GCN: s_mov_b32 s5, s32 +; GCN: s_add_u32 s32, s32, 0xc00{{$}} ; GCN-DAG: buffer_store_dword v32 ; GCN-DAG: buffer_store_dword v33 ; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32 ; GCN-DAG: v_writelane_b32 -; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}} ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}} ; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]] ; GCN-DAG: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll index ee18d322914..2fef190f8cc 100644 --- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -38,8 +38,8 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_ ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_swappc_b64 -; GCN-NEXT: s_mov_b32 s5, s33 -; GCN: v_readlane_b32 s37, v32, 4 +; GCN-DAG: s_mov_b32 s5, s33 +; GCN-DAG: v_readlane_b32 s37, v32, 4 ; GCN: v_readlane_b32 s36, v32, 3 ; GCN: v_readlane_b32 s35, v32, 2 ; GCN: v_readlane_b32 s34, v32, 1 @@ -59,7 +59,7 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa ; GCN-NEXT: s_mov_b32 s5, s33 ; GCN-NEXT: s_mov_b32 s33, s5 ; GCN-NEXT: s_swappc_b64 -; GCN-NEXT: s_mov_b32 s5, s33 +; GCN: s_mov_b32 s5, s33 define void @test_func_call_external_void_funcx2() #0 { call void @external_void_func_void() call void @external_void_func_void() @@ -175,7 +175,7 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_v32(i32 addrspace( ; GCN-NEXT: ; clobber ; GCN-NEXT: #ASMEND ; GCN-NEXT: v_readlane_b32 s33, v0, 0 -; GCN-NEXT: s_setpc_b64 +; GCN: s_setpc_b64 define hidden void @void_func_void_clobber_s33() #2 { call void asm sideeffect "; clobber", "~{s33}"() #0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index daec4930e67..ebd6f96a5b8 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -37,19 +37,19 @@ define void @callee_with_stack() #0 { ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt ; GCN: s_mov_b32 s5, s32 +; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}} ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; GCN-DAG: v_writelane_b32 v32, s33, ; GCN-DAG: v_writelane_b32 v32, s34, ; GCN-DAG: v_writelane_b32 v32, s35, -; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}} ; GCN-DAG: v_mov_b32_e32 v0, 0{{$}} ; GCN-DAG: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}} ; GCN-DAG: s_mov_b32 s33, s5 ; GCN: s_swappc_b64 -; GCN: s_mov_b32 s5, s33 +; GCN-DAG: s_mov_b32 s5, s33 ; GCN-DAG: v_readlane_b32 s35, ; GCN-DAG: v_readlane_b32 s34, ; GCN-DAG: v_readlane_b32 s33, @@ -72,7 +72,9 @@ define void @callee_with_stack_and_call() #0 { ; GCN-LABEL: {{^}}callee_no_stack_with_call: ; GCN: s_waitcnt ; GCN: s_mov_b32 s5, s32 -; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4 +; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 +; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-DAG: v_writelane_b32 v32, s33, 0 ; GCN-DAG: v_writelane_b32 v32, s34, 1 ; GCN: s_mov_b32 s33, s5 @@ -81,9 +83,12 @@ define void @callee_with_stack_and_call() #0 { ; GCN-DAG: v_readlane_b32 s34, v32, 1 ; GCN-DAG: v_readlane_b32 s33, v32, 0 -; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 -; GCN: s_sub_u32 s32, s32, 0x400 +; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 +; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] + +; GCN: s_sub_u32 s32, s32, 0x400 ; GCN: s_setpc_b64 define void @callee_no_stack_with_call() #0 { call void @external_void_func_void() @@ -94,11 +99,18 @@ declare void @external_void_func_void() #0 ; Make sure if a CSR vgpr is used for SGPR spilling, it is saved and restored ; GCN-LABEL: {{^}}callee_func_sgpr_spill_no_calls: -; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill +; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] + ; GCN: v_writelane_b32 v32 ; GCN: ;;#ASMSTART ; GCN: v_readlane_b32 s{{[0-9]+}}, v32 -; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload + +; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] + ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll index 750a0203c9b..c63d96917d9 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -326,8 +326,8 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { ; Requires loading and storing to stack slot. ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x: -; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill ; GCN: s_add_u32 s32, s32, 0x400{{$}} +; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll index 10573461b37..4b38fb8e68d 100644 --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -28,10 +28,12 @@ define float @call_split_type_used_outside_block_v2f32() #0 { ; GCN: ; %bb.0: ; %bb0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s5, s32 +; GCN-NEXT: s_add_u32 s32, s32, 0x400 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v32, s33, 0 ; GCN-NEXT: v_writelane_b32 v32, s34, 1 -; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: v_writelane_b32 v32, s35, 2 ; GCN-NEXT: s_getpc_b64 s[6:7] ; GCN-NEXT: s_add_u32 s6, s6, func_v2f32@rel32@lo+4 @@ -39,12 +41,14 @@ define float @call_split_type_used_outside_block_v2f32() #0 { ; GCN-NEXT: s_mov_b64 s[34:35], s[30:31] ; GCN-NEXT: s_mov_b32 s33, s5 ; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GCN-NEXT: s_mov_b32 s5, s33 ; GCN-NEXT: s_mov_b64 s[30:31], s[34:35] ; GCN-NEXT: v_readlane_b32 s35, v32, 2 +; GCN-NEXT: s_mov_b32 s5, s33 ; GCN-NEXT: v_readlane_b32 s34, v32, 1 ; GCN-NEXT: v_readlane_b32 s33, v32, 0 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -62,10 +66,12 @@ define float @call_split_type_used_outside_block_v3f32() #0 { ; GCN: ; %bb.0: ; %bb0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s5, s32 +; GCN-NEXT: s_add_u32 s32, s32, 0x400 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v32, s33, 0 ; GCN-NEXT: v_writelane_b32 v32, s34, 1 -; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: v_writelane_b32 v32, s35, 2 ; GCN-NEXT: s_getpc_b64 s[6:7] ; GCN-NEXT: s_add_u32 s6, s6, func_v3f32@rel32@lo+4 @@ -73,12 +79,14 @@ define float @call_split_type_used_outside_block_v3f32() #0 { ; GCN-NEXT: s_mov_b64 s[34:35], s[30:31] ; GCN-NEXT: s_mov_b32 s33, s5 ; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GCN-NEXT: s_mov_b32 s5, s33 ; GCN-NEXT: s_mov_b64 s[30:31], s[34:35] ; GCN-NEXT: v_readlane_b32 s35, v32, 2 +; GCN-NEXT: s_mov_b32 s5, s33 ; GCN-NEXT: v_readlane_b32 s34, v32, 1 ; GCN-NEXT: v_readlane_b32 s33, v32, 0 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -96,10 +104,12 @@ define half @call_split_type_used_outside_block_v4f16() #0 { ; GCN: ; %bb.0: ; %bb0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s5, s32 +; GCN-NEXT: s_add_u32 s32, s32, 0x400 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v32, s33, 0 ; GCN-NEXT: v_writelane_b32 v32, s34, 1 -; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: v_writelane_b32 v32, s35, 2 ; GCN-NEXT: s_getpc_b64 s[6:7] ; GCN-NEXT: s_add_u32 s6, s6, func_v4f16@rel32@lo+4 @@ -107,12 +117,14 @@ define half @call_split_type_used_outside_block_v4f16() #0 { ; GCN-NEXT: s_mov_b64 s[34:35], s[30:31] ; GCN-NEXT: s_mov_b32 s33, s5 ; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GCN-NEXT: s_mov_b32 s5, s33 ; GCN-NEXT: s_mov_b64 s[30:31], s[34:35] ; GCN-NEXT: v_readlane_b32 s35, v32, 2 +; GCN-NEXT: s_mov_b32 s5, s33 ; GCN-NEXT: v_readlane_b32 s34, v32, 1 ; GCN-NEXT: v_readlane_b32 s33, v32, 0 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -130,10 +142,12 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 { ; GCN: ; %bb.0: ; %bb0 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: s_mov_b32 s5, s32 +; GCN-NEXT: s_add_u32 s32, s32, 0x400 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_writelane_b32 v32, s33, 0 ; GCN-NEXT: v_writelane_b32 v32, s34, 1 -; GCN-NEXT: s_add_u32 s32, s32, 0x400 ; GCN-NEXT: v_writelane_b32 v32, s35, 2 ; GCN-NEXT: s_getpc_b64 s[6:7] ; GCN-NEXT: s_add_u32 s6, s6, func_struct@rel32@lo+4 @@ -141,13 +155,15 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 { ; GCN-NEXT: s_mov_b64 s[34:35], s[30:31] ; GCN-NEXT: s_mov_b32 s33, s5 ; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GCN-NEXT: s_mov_b32 s5, s33 ; GCN-NEXT: s_mov_b64 s[30:31], s[34:35] ; GCN-NEXT: v_readlane_b32 s35, v32, 2 +; GCN-NEXT: s_mov_b32 s5, s33 ; GCN-NEXT: v_readlane_b32 s34, v32, 1 +; GCN-NEXT: v_mov_b32_e32 v1, v4 ; GCN-NEXT: v_readlane_b32 s33, v32, 0 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: v_mov_b32_e32 v1, v4 +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll index 7fbcb9706a8..66e6988fbe6 100644 --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -10,9 +10,12 @@ declare void @external_void_func_i32(i32) #0 ; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm: ; GCN: s_waitcnt ; GCN: s_mov_b32 s5, s32 -; Spill CSR VGPR used for SGPR spilling -; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; GCN-DAG: s_add_u32 s32, s32, 0x400 +; Spill CSR VGPR used for SGPR spilling +; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 +; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] + ; GCN-DAG: v_writelane_b32 v32, s33, 0 ; GCN-DAG: v_writelane_b32 v32, s34, 1 ; GCN-DAG: v_writelane_b32 v32, s35, 2 @@ -22,7 +25,10 @@ declare void @external_void_func_i32(i32) #0 ; GCN: v_readlane_b32 s35, v32, 2 ; GCN: v_readlane_b32 s34, v32, 1 ; GCN: v_readlane_b32 s33, v32, 0 -; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 +; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} +; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 +; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] + ; GCN: s_sub_u32 s32, s32, 0x400 ; GCN: s_setpc_b64 define void @test_func_call_external_void_func_i32_imm() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll index 1881b526dcd..ba0acbc2573 100644 --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -207,13 +207,17 @@ entry: ; Have another non-tail in the function ; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_other_call: ; GCN: s_mov_b32 s5, s32 -; GCN: buffer_store_dword v34, off, s[0:3], s5 offset:12 +; GCN: s_add_u32 s32, s32, 0x400 + +; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 +; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s5 offset:12 +; GCN-NEXT: s_mov_b64 exec + ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill ; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill ; GCN-DAG: v_writelane_b32 v34, s33, 0 ; GCN-DAG: v_writelane_b32 v34, s34, 1 ; GCN-DAG: v_writelane_b32 v34, s35, 2 -; GCN-DAG: s_add_u32 s32, s32, 0x400 ; GCN-DAG: s_getpc_b64 ; GCN: s_swappc_b64 @@ -228,7 +232,10 @@ entry: ; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:4 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 -; GCN: buffer_load_dword v34, off, s[0:3], s5 offset:12 +; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 +; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s5 offset:12 +; GCN-NEXT: s_mov_b64 exec + ; GCN: s_sub_u32 s32, s32, 0x400 ; GCN: s_setpc_b64 s[6:7] define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 { |

