diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/kernel-args.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/kernel-args.ll | 72 |
1 files changed, 36 insertions, 36 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll index 95a68319f8a..6fa26cb3879 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll @@ -17,7 +17,7 @@ ; FIXME: Should be using s_load_dword ; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] -define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { +define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { entry: %0 = zext i8 %in to i32 store i32 %0, i32 addrspace(1)* %out, align 4 @@ -36,7 +36,7 @@ entry: ; FIXME: Should be using s_load_dword ; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] -define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { +define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { entry: %0 = zext i8 %in to i32 store i32 %0, i32 addrspace(1)* %out, align 4 @@ -55,7 +55,7 @@ entry: ; FIXME: Should be using s_load_dword ; HSA-VI: flat_load_sbyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] -define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { +define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { entry: %0 = sext i8 %in to i32 store i32 %0, i32 addrspace(1)* %out, align 4 @@ -75,7 +75,7 @@ entry: ; FIXME: Should be using s_load_dword ; HSA-VI: flat_load_ushort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] -define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { +define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { entry: %0 = zext i16 %in to i32 store i32 %0, i32 addrspace(1)* %out, align 4 @@ -94,7 +94,7 @@ entry: ; FIXME: Should be using s_load_dword ; HSA-VI: flat_load_ushort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] -define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { +define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { entry: %0 = zext i16 %in to i32 store i32 %0, i32 addrspace(1)* %out, align 4 @@ -113,7 +113,7 @@ entry: ; FIXME: Should be using s_load_dword ; HSA-VI: flat_load_sshort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] -define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { +define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { entry: %0 = sext i16 %in to i32 store i32 %0, i32 addrspace(1)* %out, align 4 @@ -126,7 +126,7 @@ entry: ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c ; HSA-VI: s_load_dword s{{[0-9]}}, s[4:5], 0x8 -define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind { +define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind { entry: store i32 %in, i32 addrspace(1)* %out, align 4 ret void @@ -138,7 +138,7 @@ entry: ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c ; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x8 -define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind { +define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind { entry: store float %in, float addrspace(1)* %out, align 4 ret void @@ -152,7 +152,7 @@ entry: ; MESA-GCN: buffer_load_ubyte ; HSA-VI: flat_load_ubyte ; HSA-VI: flat_load_ubyte -define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { +define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { entry: store <2 x i8> %in, <2 x i8> addrspace(1)* %out ret void @@ -166,7 +166,7 @@ entry: ; MESA-GCN: buffer_load_ushort ; HSA-VI: flat_load_ushort ; HSA-VI: flat_load_ushort -define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) { +define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) { entry: store <2 x i16> %in, <2 x i16> addrspace(1)* %out ret void @@ -179,7 +179,7 @@ entry: ; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb ; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c ; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8 -define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind { +define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind { entry: store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4 ret void @@ -192,7 +192,7 @@ entry: ; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb ; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c ; HSA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[4:5], 0x8 -define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind { +define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind { entry: store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4 ret void @@ -209,7 +209,7 @@ entry: ; HSA-VI: flat_load_ubyte ; HSA-VI: flat_load_ubyte ; HSA-VI: flat_load_ubyte -define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { +define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { entry: store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 ret void @@ -226,7 +226,7 @@ entry: ; HSA-VI: flat_load_ushort ; HSA-VI: flat_load_ushort ; HSA-VI: flat_load_ushort -define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { +define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { entry: store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 ret void @@ -239,7 +239,7 @@ entry: ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd ; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34 ; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10 -define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { +define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { entry: store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4 ret void @@ -253,7 +253,7 @@ entry: ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd ; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34 ; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10 -define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { +define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { entry: store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4 ret void @@ -273,7 +273,7 @@ entry: ; HSA-VI: flat_load_ubyte ; HSA-VI: flat_load_ubyte ; HSA-VI: flat_load_ubyte -define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { +define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { entry: store <4 x i8> %in, <4 x i8> addrspace(1)* %out ret void @@ -293,7 +293,7 @@ entry: ; HSA-GCN: flat_load_ushort ; HSA-GCN: flat_load_ushort ; HSA-GCN: flat_load_ushort -define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { +define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { entry: store <4 x i16> %in, <4 x i16> addrspace(1)* %out ret void @@ -308,7 +308,7 @@ entry: ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd ; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34 ; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10 -define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind { +define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind { entry: store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 ret void @@ -323,7 +323,7 @@ entry: ; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd ; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34 ; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10 -define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind { +define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind { entry: store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4 ret void @@ -354,7 +354,7 @@ entry: ; HSA-GCN: float_load_ubyte ; HSA-GCN: float_load_ubyte ; HSA-GCN: float_load_ubyte -define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { +define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { entry: store <8 x i8> %in, <8 x i8> addrspace(1)* %out ret void @@ -386,7 +386,7 @@ entry: ; HSA-VI: flat_load_ushort ; HSA-VI: flat_load_ushort ; HSA-VI: flat_load_ushort -define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) { +define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) { entry: store <8 x i16> %in, <8 x i16> addrspace(1)* %out ret void @@ -405,7 +405,7 @@ entry: ; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11 ; MESA-VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44 ; HSA-VI: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x20 -define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { +define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { entry: store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 ret void @@ -422,7 +422,7 @@ entry: ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X ; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11 -define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { +define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { entry: store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4 ret void @@ -478,7 +478,7 @@ entry: ; HSA-VI: flat_load_ubyte ; HSA-VI: flat_load_ubyte ; HSA-VI: flat_load_ubyte -define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { +define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { entry: store <16 x i8> %in, <16 x i8> addrspace(1)* %out ret void @@ -534,7 +534,7 @@ entry: ; HSA-VI: flat_load_ushort ; HSA-VI: flat_load_ushort ; HSA-VI: flat_load_ushort -define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) { +define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) { entry: store <16 x i16> %in, <16 x i16> addrspace(1)* %out ret void @@ -561,7 +561,7 @@ entry: ; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19 ; MESA-VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64 ; HSA-VI: s_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40 -define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { +define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { entry: store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 ret void @@ -588,7 +588,7 @@ entry: ; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19 ; MESA-VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64 ; HSA-VI: s_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40 -define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { +define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { entry: store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 ret void @@ -599,7 +599,7 @@ entry: ; MESA-GCN: s_load_dwordx2 ; MESA-GCN: buffer_store_dwordx2 ; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8 -define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { +define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { store i64 %a, i64 addrspace(1)* %out, align 8 ret void } @@ -611,7 +611,7 @@ define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { ; MESA-VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c ; MESA-GCN: buffer_store_dwordx2 ; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8 -define void @f64_kernel_arg(double addrspace(1)* %out, double %in) { +define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in) { entry: store double %in, double addrspace(1)* %out ret void @@ -621,7 +621,7 @@ entry: ; XGCN: s_load_dwordx2 ; XGCN: s_load_dwordx2 ; XGCN: buffer_store_dwordx2 -; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind { +; define amdgpu_kernel void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind { ; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8 ; ret void ; } @@ -631,7 +631,7 @@ entry: ; SI: v_and_b32_e32 ; SI: buffer_store_byte ; SI: s_endpgm -define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { store i1 %x, i1 addrspace(1)* %out, align 1 ret void } @@ -640,7 +640,7 @@ define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { ; SI: buffer_load_ubyte ; SI: buffer_store_dword ; SI: s_endpgm -define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { %ext = zext i1 %x to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 ret void @@ -650,7 +650,7 @@ define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { ; SI: buffer_load_ubyte ; SI: buffer_store_dwordx2 ; SI: s_endpgm -define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { %ext = zext i1 %x to i64 store i64 %ext, i64 addrspace(1)* %out, align 8 ret void @@ -660,7 +660,7 @@ define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { ; SI: buffer_load_ubyte ; SI: buffer_store_dword ; SI: s_endpgm -define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { %ext = sext i1 %x to i32 store i32 %ext, i32addrspace(1)* %out, align 4 ret void @@ -672,7 +672,7 @@ define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { ; SI: v_ashrrev_i32 ; SI: buffer_store_dwordx2 ; SI: s_endpgm -define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { +define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { %ext = sext i1 %x to i64 store i64 %ext, i64 addrspace(1)* %out, align 8 ret void |