diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll | 178 |
1 files changed, 128 insertions, 50 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll index dbab269350a..09bb3b40d4b 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll @@ -1,8 +1,9 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-code-object-v3 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -mattr=-code-object-v3 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s ; GCN-LABEL: {{^}}use_workitem_id_x: ; GCN: s_waitcnt -; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0 +; GCN: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v0 +; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]] ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @use_workitem_id_x() #1 { @@ -13,7 +14,8 @@ define void @use_workitem_id_x() #1 { ; GCN-LABEL: {{^}}use_workitem_id_y: ; GCN: s_waitcnt -; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0 +; GCN: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10 +; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]] ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @use_workitem_id_y() #1 { @@ -24,7 +26,8 @@ define void @use_workitem_id_y() #1 { ; GCN-LABEL: {{^}}use_workitem_id_z: ; GCN: s_waitcnt -; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0 +; GCN: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10 +; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]] ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @use_workitem_id_z() #1 { @@ -35,8 +38,10 @@ define void @use_workitem_id_z() #1 { ; GCN-LABEL: {{^}}use_workitem_id_xy: ; GCN: s_waitcnt -; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0 -; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1 +; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0 +; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10 +; GCN-DAG: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]] +; GCN-DAG: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]] ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @use_workitem_id_xy() #1 { @@ -49,9 +54,12 @@ define void @use_workitem_id_xy() #1 { ; GCN-LABEL: {{^}}use_workitem_id_xyz: ; GCN: s_waitcnt -; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0 -; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1 -; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v2 +; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0 +; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10 +; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10 +; GCN-DAG: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]] +; GCN-DAG: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]] +; GCN-DAG: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]] ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @use_workitem_id_xyz() #1 { @@ -66,8 +74,10 @@ define void @use_workitem_id_xyz() #1 { ; GCN-LABEL: {{^}}use_workitem_id_xz: ; GCN: s_waitcnt -; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0 -; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1 +; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0 +; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10 +; GCN-DAG: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]] +; GCN-DAG: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]] ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @use_workitem_id_xz() #1 { @@ -80,8 +90,10 @@ define void @use_workitem_id_xz() #1 { ; GCN-LABEL: {{^}}use_workitem_id_yz: ; GCN: s_waitcnt -; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0 -; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1 +; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10 +; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10 +; GCN-DAG: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]] +; GCN-DAG: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]] ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @use_workitem_id_yz() #1 { @@ -108,7 +120,7 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 { ; GCN-NOT: v0 ; GCN-NOT: v1 -; GCN: v_mov_b32_e32 v0, v1 +; GCN: v_lshlrev_b32_e32 v0, 10, v1 ; GCN-NOT: v0 ; GCN-NOT: v1 ; GCN: s_swappc_b64 @@ -122,15 +134,72 @@ define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 { ; GCN-NOT: v0 ; GCN-NOT: v2 -; GCN: v_mov_b32_e32 v0, v2 +; GCN: v_lshlrev_b32_e32 v0, 20, v2 ; GCN-NOT: v0 -; GCN-NOT: v2 +; GCN-NOT: v1 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 { call void @use_workitem_id_z() ret void } +; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy: +; GCN-NOT: v0 +; GCN-NOT: v1 +; GCN: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1 +; GCN: v_or_b32_e32 v0, v0, [[IDY]] +; GCN-NOT: v0 +; GCN-NOT: v1 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 { + call void @use_workitem_id_xy() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz: +; GCN-NOT: v0 +; GCN-NOT: v2 +; GCN: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2 +; GCN: v_or_b32_e32 v0, v0, [[IDZ]] +; GCN-NOT: v0 +; GCN-NOT: v2 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 { + call void @use_workitem_id_xz() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz: +; GCN-NOT: v1 +; GCN-NOT: v2 +; GCN-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1 +; GCN-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2 +; GCN: v_or_b32_e32 v0, [[IDY]], [[IDZ]] +; GCN-NOT: v1 +; GCN-NOT: v2 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 { + call void @use_workitem_id_yz() + ret void +} + +; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz: +; GCN-NOT: v0 +; GCN-NOT: v1 +; GCN-NOT: v2 +; GCN-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1 +; GCN-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2 +; GCN-DAG: v_or_b32_e32 v0, v0, [[IDY]] +; GCN-DAG: v_or_b32_e32 v0, v0, [[IDZ]] +; GCN-NOT: v0 +; GCN-NOT: v1 +; GCN-NOT: v2 +; GCN: s_swappc_b64 +define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 { + call void @use_workitem_id_xyz() + ret void +} + ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x: ; GCN-NOT: v0 ; GCN: s_swappc_b64 @@ -160,8 +229,9 @@ define void @func_indirect_use_workitem_id_z() #1 { ; GCN-LABEL: {{^}}other_arg_use_workitem_id_x: ; GCN: s_waitcnt -; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 -; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 +; GCN-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v1 +; GCN-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 +; GCN-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]] define void @other_arg_use_workitem_id_x(i32 %arg0) #1 { %val = call i32 @llvm.amdgcn.workitem.id.x() store volatile i32 %arg0, i32 addrspace(1)* undef @@ -171,8 +241,9 @@ define void @other_arg_use_workitem_id_x(i32 %arg0) #1 { ; GCN-LABEL: {{^}}other_arg_use_workitem_id_y: ; GCN: s_waitcnt -; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 -; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 +; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v1, 10, 10 +; GCN-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 +; GCN-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]] define void @other_arg_use_workitem_id_y(i32 %arg0) #1 { %val = call i32 @llvm.amdgcn.workitem.id.y() store volatile i32 %arg0, i32 addrspace(1)* undef @@ -182,8 +253,9 @@ define void @other_arg_use_workitem_id_y(i32 %arg0) #1 { ; GCN-LABEL: {{^}}other_arg_use_workitem_id_z: ; GCN: s_waitcnt -; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 -; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 +; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v1, 20, 10 +; GCN-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 +; GCN-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]] define void @other_arg_use_workitem_id_z(i32 %arg0) #1 { %val = call i32 @llvm.amdgcn.workitem.id.z() store volatile i32 %arg0, i32 addrspace(1)* undef @@ -207,6 +279,7 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 { ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y: ; GCN: enable_vgpr_workitem_id = 1 +; GCN: v_lshlrev_b32_e32 v1, 10, v1 ; GCN-NOT: v1 ; GCN: v_mov_b32_e32 v0, 0x22b ; GCN-NOT: v1 @@ -221,7 +294,7 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 { ; GCN: enable_vgpr_workitem_id = 2 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b -; GCN-DAG: v_mov_b32_e32 v1, v2 +; GCN-DAG: v_lshlrev_b32_e32 v1, 20, v2 ; GCN: s_swappc_b64 ; GCN-NOT: v0 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 { @@ -232,6 +305,7 @@ define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 { ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x: ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}} +; GCN: v_and_b32_e32 v32, 0x3ff, v32 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32 ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -357,6 +431,7 @@ define void @too_many_args_call_too_many_args_use_workitem_id_x( ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; GCN-NEXT: s_waitcnt +; GCN-NEXT: v_and_b32_e32 v32, 0x3ff, v32 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v32 ; GCN: buffer_load_dword v0, off, s[0:3], s32{{$}} ; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload @@ -469,15 +544,18 @@ define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { } ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz: -; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}} +; GCN: v_and_b32_e32 v32, 0x3ff, v32 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32 -; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:4{{$}} +; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}} +; GCN: v_bfe_u32 v32, v32, 10, 10 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32 -; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:8{{$}} +; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}} +; GCN: v_bfe_u32 v32, v32, 20, 10 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32 -; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @too_many_args_use_workitem_id_xyz( @@ -531,19 +609,19 @@ define void @too_many_args_use_workitem_id_xyz( ret void } -; frame[0] = ID X -; frame[1] = ID Y -; frame[2] = ID Z +; frame[0] = ID { Z, Y, X } ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz: ; GCN: enable_vgpr_workitem_id = 2 -; GCN: s_mov_b32 s33, s7 -; GCN: s_mov_b32 s32, s33 +; GCN-DAG: s_mov_b32 s33, s7 +; GCN-DAG: s_mov_b32 s32, s33 -; GCN-DAG: buffer_store_dword v0, off, s[0:3], s32{{$}} -; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:4 -; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:8 +; GCN-DAG: v_lshlrev_b32_e32 v1, 10, v1 +; GCN-DAG: v_or_b32_e32 v0, v0, v1 +; GCN-DAG: v_lshlrev_b32_e32 v2, 20, v2 +; GCN-DAG: v_or_b32_e32 v0, v0, v2 +; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}} ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 { call void @too_many_args_use_workitem_id_xyz( @@ -560,19 +638,19 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 { ; workitem ID X in register, yz on stack ; v31 = workitem ID X -; frame[0] = workitem Y -; frame[1] = workitem Z +; frame[0] = workitem { Z, Y, X } ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz: -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31 -; GCN: buffer_load_dword v31, off, s[0:3], s32{{$}} -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31 -; GCN: buffer_load_dword v31, off, s[0:3], s32 offset:4{{$}} -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31 +; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31 +; GCN-DAG: flat_store_dword v[0:1], [[IDX]] +; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10 +; GCN-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDY]] +; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10 +; GCN-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDZ]] ; GCN: s_waitcnt ; GCN-NEXT: s_setpc_b64 -; GCN: ScratchSize: 12 +; GCN: ScratchSize: 8 define void @too_many_args_use_workitem_id_x_stack_yz( i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, @@ -623,18 +701,18 @@ define void @too_many_args_use_workitem_id_x_stack_yz( ret void } -; frame[0] = ID Y -; frame[1] = ID Z - ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz: ; GCN: enable_vgpr_workitem_id = 2 ; GCN: s_mov_b32 s33, s7 -; GCN: s_mov_b32 s32, s33 -; GCN-DAG: v_mov_b32_e32 v31, v0 -; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32{{$}} -; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:4 +; GCN-NOT: v0 +; GCN-DAG: v_lshlrev_b32_e32 v1, 10, v1 +; GCN-DAG: v_or_b32_e32 v0, v0, v1 +; GCN-DAG: v_lshlrev_b32_e32 v2, 20, v2 +; GCN-DAG: v_or_b32_e32 v31, v0, v2 + +; GCN: s_mov_b32 s32, s33 ; GCN: s_swappc_b64 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 { call void @too_many_args_use_workitem_id_x_stack_yz( |

