summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/kernel-args.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/kernel-args.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/kernel-args.ll116
1 files changed, 116 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index 6b6473124bb..a657b8d01af 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -375,6 +375,122 @@ entry:
ret void
}
+; FUNC-LABEL: {{^}}v5i8_arg:
+; HSA-GFX9: kernarg_segment_byte_size = 16
+; HSA-GFX9: kernarg_segment_alignment = 4
+
+; EGCM-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
+; EGCM-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
+; EGCM-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
+
+; SI: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+
+; VI-MESA: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; VI-HSA: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x8
+define amdgpu_kernel void @v5i8_arg(<5 x i8> addrspace(1)* nocapture %out, <5 x i8> %in) nounwind {
+entry:
+ store <5 x i8> %in, <5 x i8> addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v5i16_arg:
+; HSA-GFX9: kernarg_segment_byte_size = 32
+; HSA-GFX9: kernarg_segment_alignment = 4
+
+; EGCM-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 58
+; EGCM-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 58
+; EGCM-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 58
+
+; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+
+; VI-HSA: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x8
+; VI-MESA: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+define amdgpu_kernel void @v5i16_arg(<5 x i16> addrspace(1)* nocapture %out, <5 x i16> %in) nounwind {
+entry:
+ store <5 x i16> %in, <5 x i16> addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v5i32_arg:
+; HSA-GFX9: kernarg_segment_byte_size = 64
+; HSA-GFX9: kernarg_segment_alignment = 5
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
+; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
+; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x44
+; HSA-GFX9: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x20
+define amdgpu_kernel void @v5i32_arg(<5 x i32> addrspace(1)* nocapture %out, <5 x i32> %in) nounwind {
+entry:
+ store <5 x i32> %in, <5 x i32> addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v5f32_arg:
+; HSA-GFX9: kernarg_segment_byte_size = 64
+; HSA-GFX9: kernarg_segment_alignment = 5
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
+; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
+; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x44
+; HSA-GFX9: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x20
+define amdgpu_kernel void @v5f32_arg(<5 x float> addrspace(1)* nocapture %out, <5 x float> %in) nounwind {
+entry:
+ store <5 x float> %in, <5 x float> addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v5i64_arg:
+; HSA-GFX9: kernarg_segment_byte_size = 128
+; HSA-GFX9: kernarg_segment_alignment = 6
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
+; SI-DAG: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
+; SI-DAG: s_load_dwordx2 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x21
+; MESA-VI-DAG: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
+; MESA-VI-DAG: s_load_dwordx2 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x84
+; HSA-GFX9-DAG: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40
+; HSA-GFX9-DAG: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x60
+define amdgpu_kernel void @v5i64_arg(<5 x i64> addrspace(1)* nocapture %out, <5 x i64> %in) nounwind {
+entry:
+ store <5 x i64> %in, <5 x i64> addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v5f64_arg:
+; HSA-GFX9: kernarg_segment_byte_size = 128
+; HSA-GFX9: kernarg_segment_alignment = 6
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y
+; EGCM-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z
+; SI-DAG: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
+; SI-DAG: s_load_dwordx2 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x21
+; MESA-VI-DAG: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
+; MESA-VI-DAG: s_load_dwordx2 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x84
+; HSA-GFX9-DAG: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40
+; HSA-GFX9-DAG: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x60
+define amdgpu_kernel void @v5f64_arg(<5 x double> addrspace(1)* nocapture %out, <5 x double> %in) nounwind {
+entry:
+ store <5 x double> %in, <5 x double> addrspace(1)* %out, align 8
+ ret void
+}
+
; FIXME: Lots of unpack and re-pack junk on VI
; FUNC-LABEL: {{^}}v8i8_arg:
; HSA-GFX9: kernarg_segment_byte_size = 16
OpenPOWER on IntegriCloud