diff options
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll | 28 |
1 files changed, 17 insertions, 11 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll index 2df3ad9cdef..d5211ceede3 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -543,19 +543,25 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { ret void } +; Only one stack load should be emitted for all 3 values. ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz: -; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}} -; GCN: v_and_b32_e32 v32, 0x3ff, v32 -; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, v32 -; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}} -; GCN: v_bfe_u32 v32, v32, 10, 10 -; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, v32 +; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GCN-NOT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32{{$}} ; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}} -; GCN: v_bfe_u32 v32, v32, 20, 10 -; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, v32 - -; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GCN-NOT: buffer_load_dword + +; GCN: v_and_b32_e32 [[AND_X:v[0-9]+]], 0x3ff, v32 +; GCN-NOT: buffer_load_dword +; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[AND_X]] +; GCN-NOT: buffer_load_dword +; GCN: v_bfe_u32 [[BFE_Y:v[0-9]+]], v32, 10, 10 +; GCN-NEXT: v_bfe_u32 [[BFE_Z:v[0-9]+]], v32, 20, 10 +; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Y]] +; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[BFE_Z]] + +; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @too_many_args_use_workitem_id_xyz( |

