diff options
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll | 23 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/nested-calls.ll | 20 |
4 files changed, 45 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll index 44648dfc3c2..6ae5aabb5ca 100644 --- a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -30,10 +30,11 @@ entry: ; GCN-LABEL: {{^}}void_func_byval_struct_non_leaf: ; GCN: s_mov_b32 s5, s32 -; GCN: buffer_store_dword v32 +; GCN-DAG: buffer_store_dword v32 +; GCN-DAG: buffer_store_dword v33 ; GCN: v_writelane_b32 -; GCN-DAG: s_add_u32 s32, s32, 0xa00{{$}} +; GCN-DAG: s_add_u32 s32, s32, 0xb00{{$}} ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}} ; GCN: v_add_i32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]] @@ -48,7 +49,8 @@ entry: ; GCN: v_readlane_b32 ; GCN: buffer_load_dword v32, -; GCN: s_sub_u32 s32, s32, 0xa00{{$}} +; GCN: buffer_load_dword v33, +; GCN: s_sub_u32 s32, s32, 0xb00{{$}} ; GCN: s_setpc_b64 define void @void_func_byval_struct_non_leaf(%struct.ByValStruct* byval noalias nocapture align 4 %arg0, %struct.ByValStruct* byval noalias nocapture align 4 %arg1) #1 { entry: @@ -67,7 +69,7 @@ entry: ; GCN-LABEL: {{^}}call_void_func_byval_struct_func: ; GCN: s_mov_b32 s5, s32 -; GCN: s_add_u32 s32, s32, 0xa00{{$}} +; GCN: s_add_u32 s32, s32, 0xc00{{$}} ; GCN: v_writelane_b32 ; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}} @@ -103,7 +105,7 @@ entry: ; GCN: v_readlane_b32 -; GCN: s_sub_u32 s32, s32, 0xa00{{$}} +; GCN: s_sub_u32 s32, s32, 0xc00{{$}} ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @call_void_func_byval_struct_func() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll index 60616639ea8..7c39831e73d 100644 --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -146,7 +146,7 @@ define void @use_stack1() #1 { } ; GCN-LABEL: {{^}}indirect_use_stack: -; GCN: ScratchSize: 2120 +; GCN: ScratchSize: 2124 define void @indirect_use_stack() #1 { %alloca = alloca [16 x i32], align 4 call void asm sideeffect "; use $0", "v"([16 x i32]* %alloca) #0 @@ -156,7 +156,7 @@ define void @indirect_use_stack() #1 { ; GCN-LABEL: {{^}}indirect_2_level_use_stack: ; GCN: is_dynamic_callstack = 0 -; GCN: ScratchSize: 2120 +; GCN: ScratchSize: 2124 define amdgpu_kernel void @indirect_2_level_use_stack() #0 { call void @indirect_use_stack() ret void @@ -199,7 +199,7 @@ define amdgpu_kernel void @usage_external_recurse() #0 { } ; GCN-LABEL: {{^}}direct_recursion_use_stack: -; GCN: ScratchSize: 2052 +; GCN: ScratchSize: 2056 define void @direct_recursion_use_stack(i32 %val) #2 { %alloca = alloca [512 x i32], align 4 call void asm sideeffect "; use $0", "v"([512 x i32]* %alloca) #0 @@ -218,7 +218,7 @@ ret: ; GCN-LABEL: {{^}}usage_direct_recursion: ; GCN: is_ptr64 = 1 ; GCN: is_dynamic_callstack = 1 -; GCN: workitem_private_segment_byte_size = 2052 +; GCN: workitem_private_segment_byte_size = 2056 define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 { call void @direct_recursion_use_stack(i32 %n) ret void diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index a07199c1a09..d0edcf8fcbb 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -36,14 +36,15 @@ define void @callee_with_stack() #0 { ; GCN-LABEL: {{^}}callee_with_stack_and_call: ; GCN: ; BB#0: ; GCN-NEXT: s_waitcnt +; GCN: s_mov_b32 s5, s32 +; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 -; GCN-DAG: s_mov_b32 s5, s32 ; GCN-DAG: v_writelane_b32 v32, s33, ; GCN-DAG: v_writelane_b32 v32, s34, ; GCN-DAG: v_writelane_b32 v32, s35, -; GCN-DAG: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}} -; GCN-DAG: s_add_u32 s32, s32, 0x200{{$}} +; GCN-DAG: s_add_u32 s32, s32, 0x300{{$}} ; GCN-DAG: v_mov_b32_e32 v0, 0{{$}} +; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}} ; GCN-DAG: s_mov_b32 s33, s5 @@ -52,6 +53,7 @@ define void @callee_with_stack() #0 { ; GCN-DAG: v_readlane_b32 s35, ; GCN-DAG: v_readlane_b32 s34, ; GCN-DAG: v_readlane_b32 s33, +; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 ; GCN: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @callee_with_stack_and_call() #0 { @@ -64,13 +66,24 @@ define void @callee_with_stack_and_call() #0 { ; Should be able to copy incoming stack pointer directly to inner ; call's stack pointer argument. +; There is stack usage only because of the need to evict a VGPR for +; spilling CSR SGPRs. + ; GCN-LABEL: {{^}}callee_no_stack_with_call: ; GCN: s_waitcnt -; GCN-NOT: s32 +; GCN: s_mov_b32 s5, s32 +; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4 +; GCN-DAG: v_writelane_b32 v32, s33, 0 +; GCN-DAG: v_writelane_b32 v32, s34, 1 ; GCN: s_mov_b32 s33, s5 ; GCN: s_swappc_b64 ; GCN: s_mov_b32 s5, s33 -; GCN-NOT: s32 + +; GCN-DAG: v_readlane_b32 s34, v32, 1 +; GCN-DAG: v_readlane_b32 s33, v32, 0 +; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 +; GCN: s_sub_u32 s32, s32, 0x200 + ; GCN: s_setpc_b64 define void @callee_no_stack_with_call() #0 { call void @external_void_func_void() diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll index 2d8d666a26b..f8ce8186e45 100644 --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -9,9 +9,21 @@ declare void @external_void_func_i32(i32) #0 ; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm: ; GCN: s_waitcnt -; GCN-NOT: s32 +; GCN: s_mov_b32 s5, s32 +; Spill CSR VGPR used for SGPR spilling +; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4 +; GCN-DAG: s_add_u32 s32, s32, 0x200 +; GCN-DAG: v_writelane_b32 v32, s33, 0 +; GCN-DAG: v_writelane_b32 v32, s34, 1 +; GCN-DAG: v_writelane_b32 v32, s35, 2 + ; GCN: s_swappc_b64 -; GCN-NOT: s32 + +; GCN: v_readlane_b32 s35, v32, 2 +; GCN: v_readlane_b32 s34, v32, 1 +; GCN: v_readlane_b32 s33, v32, 0 +; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 +; GCN: s_sub_u32 s32, s32, 0x200 ; GCN: s_setpc_b64 define void @test_func_call_external_void_func_i32_imm() #0 { call void @external_void_func_i32(i32 42) @@ -21,10 +33,10 @@ define void @test_func_call_external_void_func_i32_imm() #0 { ; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm_stack_use: ; GCN: s_waitcnt ; GCN: s_mov_b32 s5, s32 -; GCN: s_add_u32 s32, s32, 0x1100{{$}} +; GCN: s_add_u32 s32, s32, 0x1200{{$}} ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset ; GCN: s_swappc_b64 -; GCN: s_sub_u32 s32, s32, 0x1100{{$}} +; GCN: s_sub_u32 s32, s32, 0x1200{{$}} ; GCN: s_setpc_b64 define void @test_func_call_external_void_func_i32_imm_stack_use() #0 { %alloca = alloca [16 x i32], align 4 |