summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll23
-rw-r--r--llvm/test/CodeGen/AMDGPU/nested-calls.ll20
4 files changed, 45 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
index 44648dfc3c2..6ae5aabb5ca 100644
--- a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
@@ -30,10 +30,11 @@ entry:
; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf:
; GCN: s_mov_b32 s5, s32
-; GCN: buffer_store_dword v32
+; GCN-DAG: buffer_store_dword v32
+; GCN-DAG: buffer_store_dword v33
; GCN: v_writelane_b32
-; GCN-DAG: s_add_u32 s32, s32, 0xa00{{$}}
+; GCN-DAG: s_add_u32 s32, s32, 0xb00{{$}}
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
; GCN: v_add_i32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
@@ -48,7 +49,8 @@ entry:
; GCN: v_readlane_b32
; GCN: buffer_load_dword v32,
-; GCN: s_sub_u32 s32, s32, 0xa00{{$}}
+; GCN: buffer_load_dword v33,
+; GCN: s_sub_u32 s32, s32, 0xb00{{$}}
; GCN: s_setpc_b64
define void @void_func_byval_struct_non_leaf(%struct.ByValStruct* byval noalias nocapture align 4 %arg0, %struct.ByValStruct* byval noalias nocapture align 4 %arg1) #1 {
entry:
@@ -67,7 +69,7 @@ entry:
; GCN-LABEL: {{^}}call_void_func_byval_struct_func:
; GCN: s_mov_b32 s5, s32
-; GCN: s_add_u32 s32, s32, 0xa00{{$}}
+; GCN: s_add_u32 s32, s32, 0xc00{{$}}
; GCN: v_writelane_b32
; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}}
@@ -103,7 +105,7 @@ entry:
; GCN: v_readlane_b32
-; GCN: s_sub_u32 s32, s32, 0xa00{{$}}
+; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @call_void_func_byval_struct_func() #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
index 60616639ea8..7c39831e73d 100644
--- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
@@ -146,7 +146,7 @@ define void @use_stack1() #1 {
}
; GCN-LABEL: {{^}}indirect_use_stack:
-; GCN: ScratchSize: 2120
+; GCN: ScratchSize: 2124
define void @indirect_use_stack() #1 {
%alloca = alloca [16 x i32], align 4
call void asm sideeffect "; use $0", "v"([16 x i32]* %alloca) #0
@@ -156,7 +156,7 @@ define void @indirect_use_stack() #1 {
; GCN-LABEL: {{^}}indirect_2_level_use_stack:
; GCN: is_dynamic_callstack = 0
-; GCN: ScratchSize: 2120
+; GCN: ScratchSize: 2124
define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
call void @indirect_use_stack()
ret void
@@ -199,7 +199,7 @@ define amdgpu_kernel void @usage_external_recurse() #0 {
}
; GCN-LABEL: {{^}}direct_recursion_use_stack:
-; GCN: ScratchSize: 2052
+; GCN: ScratchSize: 2056
define void @direct_recursion_use_stack(i32 %val) #2 {
%alloca = alloca [512 x i32], align 4
call void asm sideeffect "; use $0", "v"([512 x i32]* %alloca) #0
@@ -218,7 +218,7 @@ ret:
; GCN-LABEL: {{^}}usage_direct_recursion:
; GCN: is_ptr64 = 1
; GCN: is_dynamic_callstack = 1
-; GCN: workitem_private_segment_byte_size = 2052
+; GCN: workitem_private_segment_byte_size = 2056
define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
call void @direct_recursion_use_stack(i32 %n)
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index a07199c1a09..d0edcf8fcbb 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -36,14 +36,15 @@ define void @callee_with_stack() #0 {
; GCN-LABEL: {{^}}callee_with_stack_and_call:
; GCN: ; BB#0:
; GCN-NEXT: s_waitcnt
+; GCN: s_mov_b32 s5, s32
+; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8
-; GCN-DAG: s_mov_b32 s5, s32
; GCN-DAG: v_writelane_b32 v32, s33,
; GCN-DAG: v_writelane_b32 v32, s34,
; GCN-DAG: v_writelane_b32 v32, s35,
-; GCN-DAG: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
-; GCN-DAG: s_add_u32 s32, s32, 0x200{{$}}
+; GCN-DAG: s_add_u32 s32, s32, 0x300{{$}}
; GCN-DAG: v_mov_b32_e32 v0, 0{{$}}
+; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
; GCN-DAG: s_mov_b32 s33, s5
@@ -52,6 +53,7 @@ define void @callee_with_stack() #0 {
; GCN-DAG: v_readlane_b32 s35,
; GCN-DAG: v_readlane_b32 s34,
; GCN-DAG: v_readlane_b32 s33,
+; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8
; GCN: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @callee_with_stack_and_call() #0 {
@@ -64,13 +66,24 @@ define void @callee_with_stack_and_call() #0 {
; Should be able to copy incoming stack pointer directly to inner
; call's stack pointer argument.
+; There is stack usage only because of the need to evict a VGPR for
+; spilling CSR SGPRs.
+
; GCN-LABEL: {{^}}callee_no_stack_with_call:
; GCN: s_waitcnt
-; GCN-NOT: s32
+; GCN: s_mov_b32 s5, s32
+; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
+; GCN-DAG: v_writelane_b32 v32, s33, 0
+; GCN-DAG: v_writelane_b32 v32, s34, 1
; GCN: s_mov_b32 s33, s5
; GCN: s_swappc_b64
; GCN: s_mov_b32 s5, s33
-; GCN-NOT: s32
+
+; GCN-DAG: v_readlane_b32 s34, v32, 1
+; GCN-DAG: v_readlane_b32 s33, v32, 0
+; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
+; GCN: s_sub_u32 s32, s32, 0x200
+
; GCN: s_setpc_b64
define void @callee_no_stack_with_call() #0 {
call void @external_void_func_void()
diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll
index 2d8d666a26b..f8ce8186e45 100644
--- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll
@@ -9,9 +9,21 @@ declare void @external_void_func_i32(i32) #0
; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm:
; GCN: s_waitcnt
-; GCN-NOT: s32
+; GCN: s_mov_b32 s5, s32
+; Spill CSR VGPR used for SGPR spilling
+; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
+; GCN-DAG: s_add_u32 s32, s32, 0x200
+; GCN-DAG: v_writelane_b32 v32, s33, 0
+; GCN-DAG: v_writelane_b32 v32, s34, 1
+; GCN-DAG: v_writelane_b32 v32, s35, 2
+
; GCN: s_swappc_b64
-; GCN-NOT: s32
+
+; GCN: v_readlane_b32 s35, v32, 2
+; GCN: v_readlane_b32 s34, v32, 1
+; GCN: v_readlane_b32 s33, v32, 0
+; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
+; GCN: s_sub_u32 s32, s32, 0x200
; GCN: s_setpc_b64
define void @test_func_call_external_void_func_i32_imm() #0 {
call void @external_void_func_i32(i32 42)
@@ -21,10 +33,10 @@ define void @test_func_call_external_void_func_i32_imm() #0 {
; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm_stack_use:
; GCN: s_waitcnt
; GCN: s_mov_b32 s5, s32
-; GCN: s_add_u32 s32, s32, 0x1100{{$}}
+; GCN: s_add_u32 s32, s32, 0x1200{{$}}
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset
; GCN: s_swappc_b64
-; GCN: s_sub_u32 s32, s32, 0x1100{{$}}
+; GCN: s_sub_u32 s32, s32, 0x1200{{$}}
; GCN: s_setpc_b64
define void @test_func_call_external_void_func_i32_imm_stack_use() #0 {
%alloca = alloca [16 x i32], align 4
OpenPOWER on IntegriCloud