summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/kernel-args.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/kernel-args.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/kernel-args.ll130
1 files changed, 48 insertions, 82 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index 231582513a3..da8c994c530 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -210,8 +210,10 @@ entry:
; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
-; GCN: s_load_dword s
-; GCN-NOT: {{buffer|flat|global}}_load_
+; SI: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+
+; VI-MESA: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-HSA: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x8
define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
entry:
store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
@@ -226,8 +228,7 @@ entry:
; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
-; SI: s_load_dword s
-; SI: s_load_dword s
+; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
; VI-HSA: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x8
; VI-MESA: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
@@ -236,6 +237,7 @@ entry:
store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
ret void
}
+
; FUNC-LABEL: {{^}}v3i32_arg:
; HSA-VI: kernarg_segment_byte_size = 32
; HSA-VI: kernarg_segment_alignment = 4
@@ -274,8 +276,8 @@ entry:
; EG: VTX_READ_8
; EG: VTX_READ_8
-; GCN: s_load_dword s
-; GCN-NOT: {{buffer|flat|global}}_load_
+; GCN-DAG: s_load_dwordx2 s
+; GCN-DAG: s_load_dword s
define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
entry:
store <4 x i8> %in, <4 x i8> addrspace(1)* %out
@@ -290,12 +292,18 @@ entry:
; EG: VTX_READ_16
; EG: VTX_READ_16
-; SI-DAG: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0xb
; SI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x9
-; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x2c
-; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8
+; MESA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x24
+; MESA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x2c
+
+
+; MESA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x24
+; MESA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x2c
+
+; HSA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x0
+; HSA-VI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x8
define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
entry:
store <4 x i16> %in, <4 x i16> addrspace(1)* %out
@@ -348,23 +356,16 @@ entry:
; EG: VTX_READ_8
; EG: VTX_READ_8
-
-; SI: s_load_dword s
-; SI: s_load_dword s
+; SI-NOT: {{buffer|flat|global}}_load
; SI: s_load_dwordx2 s
+; SI-NEXT: s_load_dwordx2 s
; SI-NOT: {{buffer|flat|global}}_load
-; VI: s_load_dword s
-; VI: s_load_dword s
-
-; VI: v_lshlrev_b16
-; VI: v_or_b32_e32
-; VI: v_or_b32_sdwa
-; VI: v_or_b32_sdwa
-; VI: v_lshlrev_b16
-; VI: s_lshr_b32
-; VI: v_or_b32_sdwa
-; VI: v_or_b32_sdwa
+; VI: s_load_dwordx2 s
+; VI-NEXT: s_load_dwordx2 s
+; VI-NOT: lshl
+; VI-NOT: _or
+; VI-NOT: _sdwa
define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
entry:
store <8 x i8> %in, <8 x i8> addrspace(1)* %out
@@ -383,19 +384,14 @@ entry:
; EG: VTX_READ_16
; EG: VTX_READ_16
-; SI: s_load_dword s
-; SI: s_load_dword s
-; SI: s_load_dword s
-; SI: s_load_dword s
-; SI: s_load_dwordx2
+; SI: s_load_dwordx4
+; SI-NEXT: s_load_dwordx2
; SI-NOT: {{buffer|flat|global}}_load
-; MESA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x34
-; MESA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x3c
+; MESA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x34
-; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x10
-; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x18
+; HSA-VI: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x10
define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
entry:
store <8 x i16> %in, <8 x i16> addrspace(1)* %out
@@ -413,6 +409,7 @@ entry:
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
+
; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
; MESA-VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44
; HSA-VI: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x20
@@ -462,33 +459,16 @@ entry:
; EG: VTX_READ_8
; EG: VTX_READ_8
-; SI: s_load_dword s
-; SI: s_load_dword s
-; SI: s_load_dword s
-; SI: s_load_dword s
-; SI: s_load_dwordx2
+; SI: s_load_dwordx4 s
+; SI-NEXT: s_load_dwordx2 s
; SI-NOT: {{buffer|flat|global}}_load
-; VI: s_load_dword s
-; VI: s_load_dword s
-; VI: s_load_dword s
-; VI: s_load_dword s
-
-; VI: s_lshr_b32
-; VI: v_lshlrev_b16
-; VI: s_lshr_b32
-; VI: s_lshr_b32
-; VI: v_or_b32_sdwa
-; VI: v_or_b32_sdwa
-; VI: v_lshlrev_b16
-; VI: v_lshlrev_b16
-; VI: v_or_b32_sdwa
-; VI: v_or_b32_sdwa
-; VI: v_lshlrev_b16
-; VI: v_lshlrev_b16
-; VI: v_or_b32_sdwa
-; VI: v_or_b32_sdwa
+; VI: s_load_dwordx4 s
+; VI-NOT: shr
+; VI-NOT: shl
+; VI-NOT: _sdwa
+; VI-NOT: _or_
define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
entry:
store <16 x i8> %in, <16 x i8> addrspace(1)* %out
@@ -516,27 +496,14 @@ entry:
; EG: VTX_READ_16
; EG: VTX_READ_16
-; SI: s_load_dword s
-; SI: s_load_dword s
-; SI: s_load_dword s
-; SI: s_load_dword s
-; SI: s_load_dword s
-; SI: s_load_dword s
-; SI: s_load_dword s
-; SI: s_load_dword s
-
+; SI: s_load_dwordx8 s
+; SI-NEXT: s_load_dwordx2 s
; SI-NOT: {{buffer|flat|global}}_load
-; MESA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44
-; MESA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x4c
-; MESA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x54
-; MESA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x5c
+; MESA-VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44
-; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20
-; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x28
-; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x30
-; HSA-VI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x38
+; HSA-VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x20
define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
entry:
store <16 x i16> %in, <16 x i16> addrspace(1)* %out
@@ -600,22 +567,21 @@ entry:
}
; FUNC-LABEL: {{^}}kernel_arg_i64:
-; MESA-GCN: s_load_dwordx2
-; MESA-GCN: s_load_dwordx2
+; MESA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[0:1], 0x24
+; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
+
; MESA-GCN: buffer_store_dwordx2
-; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8
define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
store i64 %a, i64 addrspace(1)* %out, align 8
ret void
}
; FUNC-LABEL: {{^}}f64_kernel_arg:
-; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
-; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
-; MESA-VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x24
-; MESA-VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c
+; SI-DAG: s_load_dwordx4 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
+; MESA-VI-DAG: s_load_dwordx4 s[{{[0-9]:[0-9]}}], s[0:1], 0x24
; MESA-GCN: buffer_store_dwordx2
-; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8
+
+; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in) {
entry:
store double %in, double addrspace(1)* %out
OpenPOWER on IntegriCloud