diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/kernel-args.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/kernel-args.ll | 130 |
1 files changed, 48 insertions, 82 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll index 231582513a3..da8c994c530 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll @@ -210,8 +210,10 @@ entry: ; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41 ; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42 -; GCN: s_load_dword s -; GCN-NOT: {{buffer|flat|global}}_load_ +; SI: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb + +; VI-MESA: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; VI-HSA: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x8 define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { entry: store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 @@ -226,8 +228,7 @@ entry: ; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46 ; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48 -; SI: s_load_dword s -; SI: s_load_dword s +; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb ; VI-HSA: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x8 ; VI-MESA: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c @@ -236,6 +237,7 @@ entry: store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 ret void } + ; FUNC-LABEL: {{^}}v3i32_arg: ; HSA-VI: kernarg_segment_byte_size = 32 ; HSA-VI: kernarg_segment_alignment = 4 @@ -274,8 +276,8 @@ entry: ; EG: VTX_READ_8 ; EG: VTX_READ_8 -; GCN: s_load_dword s -; GCN-NOT: {{buffer|flat|global}}_load_ +; GCN-DAG: s_load_dwordx2 s +; GCN-DAG: s_load_dword s define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { entry: store <4 x i8> %in, <4 x i8> addrspace(1)* %out @@ -290,12 +292,18 @@ entry: ; EG: VTX_READ_16 ; EG: VTX_READ_16 -; SI-DAG: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb -; SI-DAG: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xc +; SI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0xb ; SI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x9 -; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x2c -; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8 +; MESA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x24 +; MESA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x2c + + +; MESA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x24 +; MESA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x2c + +; HSA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0 +; HSA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8 define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { entry: store <4 x i16> %in, <4 x i16> addrspace(1)* %out @@ -348,23 +356,16 @@ entry: ; EG: VTX_READ_8 ; EG: VTX_READ_8 - -; SI: s_load_dword s -; SI: s_load_dword s +; SI-NOT: {{buffer|flat|global}}_load ; SI: s_load_dwordx2 s +; SI-NEXT: s_load_dwordx2 s ; SI-NOT: {{buffer|flat|global}}_load -; VI: s_load_dword s -; VI: s_load_dword s - -; VI: v_lshlrev_b16 -; VI: v_or_b32_e32 -; VI: v_or_b32_sdwa -; VI: v_or_b32_sdwa -; VI: v_lshlrev_b16 -; VI: s_lshr_b32 -; VI: v_or_b32_sdwa -; VI: v_or_b32_sdwa +; VI: s_load_dwordx2 s +; VI-NEXT: s_load_dwordx2 s +; VI-NOT: lshl +; VI-NOT: _or +; VI-NOT: _sdwa define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { entry: store <8 x i8> %in, <8 x i8> addrspace(1)* %out @@ -383,19 +384,14 @@ entry: ; EG: VTX_READ_16 ; EG: VTX_READ_16 -; SI: s_load_dword s -; SI: s_load_dword s -; SI: s_load_dword s -; SI: s_load_dword s -; SI: s_load_dwordx2 +; SI: s_load_dwordx4 +; SI-NEXT: s_load_dwordx2 ; SI-NOT: {{buffer|flat|global}}_load -; MESA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x34 -; MESA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x3c +; MESA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x34 -; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x10 -; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x18 +; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x10 define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) { entry: store <8 x i16> %in, <8 x i16> addrspace(1)* %out @@ -413,6 +409,7 @@ entry: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X + ; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11 ; MESA-VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44 ; HSA-VI: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x20 @@ -462,33 +459,16 @@ entry: ; EG: VTX_READ_8 ; EG: VTX_READ_8 -; SI: s_load_dword s -; SI: s_load_dword s -; SI: s_load_dword s -; SI: s_load_dword s -; SI: s_load_dwordx2 +; SI: s_load_dwordx4 s +; SI-NEXT: s_load_dwordx2 s ; SI-NOT: {{buffer|flat|global}}_load -; VI: s_load_dword s -; VI: s_load_dword s -; VI: s_load_dword s -; VI: s_load_dword s - -; VI: s_lshr_b32 -; VI: v_lshlrev_b16 -; VI: s_lshr_b32 -; VI: s_lshr_b32 -; VI: v_or_b32_sdwa -; VI: v_or_b32_sdwa -; VI: v_lshlrev_b16 -; VI: v_lshlrev_b16 -; VI: v_or_b32_sdwa -; VI: v_or_b32_sdwa -; VI: v_lshlrev_b16 -; VI: v_lshlrev_b16 -; VI: v_or_b32_sdwa -; VI: v_or_b32_sdwa +; VI: s_load_dwordx4 s +; VI-NOT: shr +; VI-NOT: shl +; VI-NOT: _sdwa +; VI-NOT: _or_ define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { entry: store <16 x i8> %in, <16 x i8> addrspace(1)* %out @@ -516,27 +496,14 @@ entry: ; EG: VTX_READ_16 ; EG: VTX_READ_16 -; SI: s_load_dword s -; SI: s_load_dword s -; SI: s_load_dword s -; SI: s_load_dword s -; SI: s_load_dword s -; SI: s_load_dword s -; SI: s_load_dword s -; SI: s_load_dword s - +; SI: s_load_dwordx8 s +; SI-NEXT: s_load_dwordx2 s ; SI-NOT: {{buffer|flat|global}}_load -; MESA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44 -; MESA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x4c -; MESA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x54 -; MESA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x5c +; MESA-VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44 -; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20 -; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x28 -; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x30 -; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x38 +; HSA-VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20 define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) { entry: store <16 x i16> %in, <16 x i16> addrspace(1)* %out @@ -600,22 +567,21 @@ entry: } ; FUNC-LABEL: {{^}}kernel_arg_i64: -; MESA-GCN: s_load_dwordx2 -; MESA-GCN: s_load_dwordx2 +; MESA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[0:1], 0x24 +; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0 + ; MESA-GCN: buffer_store_dwordx2 -; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8 define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { store i64 %a, i64 addrspace(1)* %out, align 8 ret void } ; FUNC-LABEL: {{^}}f64_kernel_arg: -; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9 -; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb -; MESA-VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x24 -; MESA-VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c +; SI-DAG: s_load_dwordx4 s[{{[0-9]:[0-9]}}], s[0:1], 0x9 +; MESA-VI-DAG: s_load_dwordx4 s[{{[0-9]:[0-9]}}], s[0:1], 0x24 ; MESA-GCN: buffer_store_dwordx2 -; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8 + +; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0 define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in) { entry: store double %in, double addrspace(1)* %out |