summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-05-24 18:18:51 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-05-24 18:18:51 +0000
commit3d59e388ca252615beb573768015d32526fd1d56 (patch)
tree564d416539423a35d470582ce1a05c7d56a7fd13 /llvm/test/CodeGen
parent21efe2afed7b743f37780f39b090af6145b4d527 (diff)
downloadbcm5719-llvm-3d59e388ca252615beb573768015d32526fd1d56.tar.gz
bcm5719-llvm-3d59e388ca252615beb573768015d32526fd1d56.zip
AMDGPU: Activate all lanes when spilling CSR VGPR for SGPR spills
If some lanes weren't active on entry to the function, this could clobber their VGPR values. llvm-svn: 361655
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll26
-rw-r--r--llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll34
-rw-r--r--llvm/test/CodeGen/AMDGPU/nested-calls.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/sibling-call.ll13
7 files changed, 69 insertions, 28 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
index c4b2561a8f2..cd1ce13eb16 100644
--- a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
@@ -30,11 +30,11 @@ entry:
; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf:
; GCN: s_mov_b32 s5, s32
+; GCN: s_add_u32 s32, s32, 0xc00{{$}}
; GCN-DAG: buffer_store_dword v32
; GCN-DAG: buffer_store_dword v33
; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32
; GCN-DAG: v_writelane_b32
-; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
; GCN-DAG: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
index ee18d322914..2fef190f8cc 100644
--- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
@@ -38,8 +38,8 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: s_swappc_b64
-; GCN-NEXT: s_mov_b32 s5, s33
-; GCN: v_readlane_b32 s37, v32, 4
+; GCN-DAG: s_mov_b32 s5, s33
+; GCN-DAG: v_readlane_b32 s37, v32, 4
; GCN: v_readlane_b32 s36, v32, 3
; GCN: v_readlane_b32 s35, v32, 2
; GCN: v_readlane_b32 s34, v32, 1
@@ -59,7 +59,7 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa
; GCN-NEXT: s_mov_b32 s5, s33
; GCN-NEXT: s_mov_b32 s33, s5
; GCN-NEXT: s_swappc_b64
-; GCN-NEXT: s_mov_b32 s5, s33
+; GCN: s_mov_b32 s5, s33
define void @test_func_call_external_void_funcx2() #0 {
call void @external_void_func_void()
call void @external_void_func_void()
@@ -175,7 +175,7 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_v32(i32 addrspace(
; GCN-NEXT: ; clobber
; GCN-NEXT: #ASMEND
; GCN-NEXT: v_readlane_b32 s33, v0, 0
-; GCN-NEXT: s_setpc_b64
+; GCN: s_setpc_b64
define hidden void @void_func_void_clobber_s33() #2 {
call void asm sideeffect "; clobber", "~{s33}"() #0
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index daec4930e67..ebd6f96a5b8 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -37,19 +37,19 @@ define void @callee_with_stack() #0 {
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt
; GCN: s_mov_b32 s5, s32
+; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8
; GCN-DAG: v_writelane_b32 v32, s33,
; GCN-DAG: v_writelane_b32 v32, s34,
; GCN-DAG: v_writelane_b32 v32, s35,
-; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
; GCN-DAG: v_mov_b32_e32 v0, 0{{$}}
; GCN-DAG: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
; GCN-DAG: s_mov_b32 s33, s5
; GCN: s_swappc_b64
-; GCN: s_mov_b32 s5, s33
+; GCN-DAG: s_mov_b32 s5, s33
; GCN-DAG: v_readlane_b32 s35,
; GCN-DAG: v_readlane_b32 s34,
; GCN-DAG: v_readlane_b32 s33,
@@ -72,7 +72,9 @@ define void @callee_with_stack_and_call() #0 {
; GCN-LABEL: {{^}}callee_no_stack_with_call:
; GCN: s_waitcnt
; GCN: s_mov_b32 s5, s32
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
+; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
+; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4
+; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-DAG: v_writelane_b32 v32, s33, 0
; GCN-DAG: v_writelane_b32 v32, s34, 1
; GCN: s_mov_b32 s33, s5
@@ -81,9 +83,12 @@ define void @callee_with_stack_and_call() #0 {
; GCN-DAG: v_readlane_b32 s34, v32, 1
; GCN-DAG: v_readlane_b32 s33, v32, 0
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
-; GCN: s_sub_u32 s32, s32, 0x400
+; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
+; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4
+; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
+
+; GCN: s_sub_u32 s32, s32, 0x400
; GCN: s_setpc_b64
define void @callee_no_stack_with_call() #0 {
call void @external_void_func_void()
@@ -94,11 +99,18 @@ declare void @external_void_func_void() #0
; Make sure if a CSR vgpr is used for SGPR spilling, it is saved and restored
; GCN-LABEL: {{^}}callee_func_sgpr_spill_no_calls:
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
+; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
+; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
+
; GCN: v_writelane_b32 v32
; GCN: ;;#ASMSTART
; GCN: v_readlane_b32 s{{[0-9]+}}, v32
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
+
+; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
+; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
+
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
index 750a0203c9b..c63d96917d9 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
@@ -326,8 +326,8 @@ define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
; Requires loading and storing to stack slot.
; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
; GCN: s_add_u32 s32, s32, 0x400{{$}}
+; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
index 10573461b37..4b38fb8e68d 100644
--- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
@@ -28,10 +28,12 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
; GCN: ; %bb.0: ; %bb0
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s5, s32
+; GCN-NEXT: s_add_u32 s32, s32, 0x400
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: v_writelane_b32 v32, s33, 0
; GCN-NEXT: v_writelane_b32 v32, s34, 1
-; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: v_writelane_b32 v32, s35, 2
; GCN-NEXT: s_getpc_b64 s[6:7]
; GCN-NEXT: s_add_u32 s6, s6, func_v2f32@rel32@lo+4
@@ -39,12 +41,14 @@ define float @call_split_type_used_outside_block_v2f32() #0 {
; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
; GCN-NEXT: s_mov_b32 s33, s5
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT: s_mov_b32 s5, s33
; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
; GCN-NEXT: v_readlane_b32 s35, v32, 2
+; GCN-NEXT: s_mov_b32 s5, s33
; GCN-NEXT: v_readlane_b32 s34, v32, 1
; GCN-NEXT: v_readlane_b32 s33, v32, 0
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -62,10 +66,12 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
; GCN: ; %bb.0: ; %bb0
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s5, s32
+; GCN-NEXT: s_add_u32 s32, s32, 0x400
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: v_writelane_b32 v32, s33, 0
; GCN-NEXT: v_writelane_b32 v32, s34, 1
-; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: v_writelane_b32 v32, s35, 2
; GCN-NEXT: s_getpc_b64 s[6:7]
; GCN-NEXT: s_add_u32 s6, s6, func_v3f32@rel32@lo+4
@@ -73,12 +79,14 @@ define float @call_split_type_used_outside_block_v3f32() #0 {
; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
; GCN-NEXT: s_mov_b32 s33, s5
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT: s_mov_b32 s5, s33
; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
; GCN-NEXT: v_readlane_b32 s35, v32, 2
+; GCN-NEXT: s_mov_b32 s5, s33
; GCN-NEXT: v_readlane_b32 s34, v32, 1
; GCN-NEXT: v_readlane_b32 s33, v32, 0
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -96,10 +104,12 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
; GCN: ; %bb.0: ; %bb0
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s5, s32
+; GCN-NEXT: s_add_u32 s32, s32, 0x400
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: v_writelane_b32 v32, s33, 0
; GCN-NEXT: v_writelane_b32 v32, s34, 1
-; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: v_writelane_b32 v32, s35, 2
; GCN-NEXT: s_getpc_b64 s[6:7]
; GCN-NEXT: s_add_u32 s6, s6, func_v4f16@rel32@lo+4
@@ -107,12 +117,14 @@ define half @call_split_type_used_outside_block_v4f16() #0 {
; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
; GCN-NEXT: s_mov_b32 s33, s5
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT: s_mov_b32 s5, s33
; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
; GCN-NEXT: v_readlane_b32 s35, v32, 2
+; GCN-NEXT: s_mov_b32 s5, s33
; GCN-NEXT: v_readlane_b32 s34, v32, 1
; GCN-NEXT: v_readlane_b32 s33, v32, 0
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -130,10 +142,12 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
; GCN: ; %bb.0: ; %bb0
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s5, s32
+; GCN-NEXT: s_add_u32 s32, s32, 0x400
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: v_writelane_b32 v32, s33, 0
; GCN-NEXT: v_writelane_b32 v32, s34, 1
-; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: v_writelane_b32 v32, s35, 2
; GCN-NEXT: s_getpc_b64 s[6:7]
; GCN-NEXT: s_add_u32 s6, s6, func_struct@rel32@lo+4
@@ -141,13 +155,15 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 {
; GCN-NEXT: s_mov_b64 s[34:35], s[30:31]
; GCN-NEXT: s_mov_b32 s33, s5
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
-; GCN-NEXT: s_mov_b32 s5, s33
; GCN-NEXT: s_mov_b64 s[30:31], s[34:35]
; GCN-NEXT: v_readlane_b32 s35, v32, 2
+; GCN-NEXT: s_mov_b32 s5, s33
; GCN-NEXT: v_readlane_b32 s34, v32, 1
+; GCN-NEXT: v_mov_b32_e32 v1, v4
; GCN-NEXT: v_readlane_b32 s33, v32, 0
+; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
-; GCN-NEXT: v_mov_b32_e32 v1, v4
+; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll
index 7fbcb9706a8..66e6988fbe6 100644
--- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll
@@ -10,9 +10,12 @@ declare void @external_void_func_i32(i32) #0
; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm:
; GCN: s_waitcnt
; GCN: s_mov_b32 s5, s32
-; Spill CSR VGPR used for SGPR spilling
-; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
; GCN-DAG: s_add_u32 s32, s32, 0x400
+; Spill CSR VGPR used for SGPR spilling
+; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
+; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s5 offset:4
+; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
+
; GCN-DAG: v_writelane_b32 v32, s33, 0
; GCN-DAG: v_writelane_b32 v32, s34, 1
; GCN-DAG: v_writelane_b32 v32, s35, 2
@@ -22,7 +25,10 @@ declare void @external_void_func_i32(i32) #0
; GCN: v_readlane_b32 s35, v32, 2
; GCN: v_readlane_b32 s34, v32, 1
; GCN: v_readlane_b32 s33, v32, 0
-; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
+; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
+; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s5 offset:4
+; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
+
; GCN: s_sub_u32 s32, s32, 0x400
; GCN: s_setpc_b64
define void @test_func_call_external_void_func_i32_imm() #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 1881b526dcd..ba0acbc2573 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -207,13 +207,17 @@ entry:
; Have another non-tail in the function
; GCN-LABEL: {{^}}sibling_call_i32_fastcc_i32_i32_other_call:
; GCN: s_mov_b32 s5, s32
-; GCN: buffer_store_dword v34, off, s[0:3], s5 offset:12
+; GCN: s_add_u32 s32, s32, 0x400
+
+; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
+; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s5 offset:12
+; GCN-NEXT: s_mov_b64 exec
+
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
; GCN: buffer_store_dword v33, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
; GCN-DAG: v_writelane_b32 v34, s33, 0
; GCN-DAG: v_writelane_b32 v34, s34, 1
; GCN-DAG: v_writelane_b32 v34, s35, 2
-; GCN-DAG: s_add_u32 s32, s32, 0x400
; GCN-DAG: s_getpc_b64
; GCN: s_swappc_b64
@@ -228,7 +232,10 @@ entry:
; GCN: buffer_load_dword v33, off, s[0:3], s5 offset:4
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8
-; GCN: buffer_load_dword v34, off, s[0:3], s5 offset:12
+; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
+; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s5 offset:12
+; GCN-NEXT: s_mov_b64 exec
+
; GCN: s_sub_u32 s32, s32, 0x400
; GCN: s_setpc_b64 s[6:7]
define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 {
OpenPOWER on IntegriCloud