diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/kernel-args.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/kernel-args.ll | 210 |
1 files changed, 102 insertions, 108 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll index cb97d716e38..2f7d41ccf03 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll @@ -11,12 +11,10 @@ ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb ; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c ; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff -; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 -; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 -; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] -; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] -; FIXME: Should be using s_load_dword -; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]{{$}} + +; HSA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8 +; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff + define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { entry: @@ -31,13 +29,9 @@ entry: ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c -; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 -; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 -; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] -; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] -; FIXME: Should be using s_load_dword -; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]{{$}} +; HSA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8 +; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { entry: %0 = zext i8 %in to i32 @@ -50,14 +44,12 @@ entry: ; HSA-VI: kernarg_segment_alignment = 4 ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb + ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c -; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 -; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 -; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] -; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] -; FIXME: Should be using s_load_dword -; HSA-VI: flat_load_sbyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]{{$}} +; HSA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8 +; HSA-VI: s_sext_i32_i8 s{{[0-9]+}}, [[VAL]] +; HSA-VI: flat_store_dword define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { entry: %0 = sext i8 %in to i32 @@ -71,15 +63,13 @@ entry: ; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb + ; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c ; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff -; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 -; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 -; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] -; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] -; FIXME: Should be using s_load_dword -; HSA-VI: flat_load_ushort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]{{$}} +; HSA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8 +; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xffff{{$}} +; HSA-VI: flat_store_dword define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { entry: %0 = zext i16 %in to i32 @@ -94,13 +84,10 @@ entry: ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c -; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 -; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 -; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] -; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] -; FIXME: Should be using s_load_dword -; HSA-VI: flat_load_ushort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]{{$}} +; HSA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8 +; HSA-VI: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xffff{{$}} +; HSA-VI: flat_store_dword define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { entry: %0 = zext i16 %in to i32 @@ -115,13 +102,11 @@ entry: ; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb ; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c -; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 -; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 -; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] -; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] -; FIXME: Should be using s_load_dword -; HSA-VI: flat_load_sshort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]]{{$}} + +; HSA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x8 +; HSA-VI: s_sext_i32_i16 s{{[0-9]+}}, [[VAL]] +; HSA-VI: flat_store_dword define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { entry: %0 = sext i16 %in to i32 @@ -163,10 +148,8 @@ entry: ; EG: VTX_READ_8 ; EG: VTX_READ_8 -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte - -; HSA: flat_load_ushort +; GCN: s_load_dword s +; GCN-NOT: {{buffer|flat|global}}_load_ define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { entry: store <2 x i8> %in, <2 x i8> addrspace(1)* %out @@ -226,15 +209,9 @@ entry: ; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40 ; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41 ; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42 -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte - -; MESA-VI: buffer_load_ushort -; MESA-VI: buffer_load_ubyte -; HSA-VI: flat_load_ushort -; HSA-VI: flat_load_ubyte +; GCN: s_load_dword s +; GCN-NOT: {{buffer|flat|global}}_load_ define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { entry: store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 @@ -249,8 +226,8 @@ entry: ; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46 ; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48 -; GCN-DAG: s_load_dword s -; GCN-DAG: {{buffer|flat}}_load_ushort +; GCN: s_load_dword s +; GCN: s_load_dword s define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { entry: store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 @@ -294,12 +271,8 @@ entry: ; EG: VTX_READ_8 ; EG: VTX_READ_8 -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte - -; VI: s_load_dword s +; GCN: s_load_dword s +; GCN-NOT: {{buffer|flat|global}}_load_ define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { entry: store <4 x i8> %in, <4 x i8> addrspace(1)* %out @@ -314,7 +287,8 @@ entry: ; EG: VTX_READ_16 ; EG: VTX_READ_16 -; SI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0xb +; SI-DAG: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb +; SI-DAG: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xc ; SI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x9 ; MESA-VI-DAG: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x2c @@ -361,6 +335,7 @@ entry: ret void } +; FIXME: Lots of unpack and re-pack junk on VI ; FUNC-LABEL: {{^}}v8i8_arg: ; HSA-VI: kernarg_segment_byte_size = 16 ; HSA-VI: kernarg_segment_alignment = 4 @@ -373,16 +348,23 @@ entry: ; EG: VTX_READ_8 ; EG: VTX_READ_8 -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; VI: s_load_dwordx2 -; VI: s_load_dwordx2 +; SI: s_load_dword s +; SI: s_load_dword s +; SI: s_load_dwordx2 s +; SI-NOT: {{buffer|flat|global}}_load + +; VI: s_load_dword s +; VI: s_load_dword s + +; VI: v_lshlrev_b16 +; VI: v_or_b32_e32 +; VI: v_or_b32_sdwa +; VI: v_or_b32_sdwa +; VI: v_lshlrev_b16 +; VI: s_lshr_b32 +; VI: v_or_b32_sdwa +; VI: v_or_b32_sdwa define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { entry: store <8 x i8> %in, <8 x i8> addrspace(1)* %out @@ -401,9 +383,13 @@ entry: ; EG: VTX_READ_16 ; EG: VTX_READ_16 +; SI: s_load_dword s +; SI: s_load_dword s +; SI: s_load_dword s +; SI: s_load_dword s ; SI: s_load_dwordx2 -; SI: s_load_dwordx2 -; SI: s_load_dwordx2 +; SI-NOT: {{buffer|flat|global}}_load + ; VI: s_load_dwordx2 ; VI: s_load_dword s @@ -454,6 +440,8 @@ entry: ret void } +; FIXME: Pack/repack on VI + ; FUNC-LABEL: {{^}}v16i8_arg: ; HSA-VI: kernarg_segment_byte_size = 32 ; HSA-VI: kernarg_segment_alignment = 4 @@ -474,26 +462,33 @@ entry: ; EG: VTX_READ_8 ; EG: VTX_READ_8 -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte -; SI: buffer_load_ubyte +; SI: s_load_dword s +; SI: s_load_dword s +; SI: s_load_dword s +; SI: s_load_dword s +; SI: s_load_dwordx2 +; SI-NOT: {{buffer|flat|global}}_load -; VI: s_load_dwordx2 -; VI: s_load_dwordx2 -; VI: s_load_dwordx2 + +; VI: s_load_dword s +; VI: s_load_dword s +; VI: s_load_dword s +; VI: s_load_dword s + +; VI: s_lshr_b32 +; VI: v_lshlrev_b16 +; VI: s_lshr_b32 +; VI: s_lshr_b32 +; VI: v_or_b32_sdwa +; VI: v_or_b32_sdwa +; VI: v_lshlrev_b16 +; VI: v_lshlrev_b16 +; VI: v_or_b32_sdwa +; VI: v_or_b32_sdwa +; VI: v_lshlrev_b16 +; VI: v_lshlrev_b16 +; VI: v_or_b32_sdwa +; VI: v_or_b32_sdwa define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { entry: store <16 x i8> %in, <16 x i8> addrspace(1)* %out @@ -508,6 +503,7 @@ entry: ; EG: VTX_READ_16 ; EG: VTX_READ_16 ; EG: VTX_READ_16 + ; EG: VTX_READ_16 ; EG: VTX_READ_16 ; EG: VTX_READ_16 @@ -524,9 +520,13 @@ entry: ; SI: s_load_dword s ; SI: s_load_dword s ; SI: s_load_dword s -; SI: s_load_dwordx2 -; SI: s_load_dwordx2 -; SI: s_load_dwordx2 +; SI: s_load_dword s +; SI: s_load_dword s +; SI: s_load_dword s +; SI: s_load_dword s + +; SI-NOT: {{buffer|flat|global}}_load + ; VI: s_load_dword s ; VI: s_load_dword s @@ -634,10 +634,9 @@ entry: ; HSA-VI: kernarg_segment_byte_size = 12 ; HSA-VI: kernarg_segment_alignment = 4 -; SI: buffer_load_ubyte -; SI: v_and_b32_e32 -; SI: buffer_store_byte -; SI: s_endpgm +; GCN: s_load_dword s +; GCN: s_and_b32 +; GCN: {{buffer|flat}}_store_byte define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { store i1 %x, i1 addrspace(1)* %out, align 1 ret void @@ -647,9 +646,8 @@ define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { ; HSA-VI: kernarg_segment_byte_size = 12 ; HSA-VI: kernarg_segment_alignment = 4 -; SI: buffer_load_ubyte -; SI: buffer_store_dword -; SI: s_endpgm +; GCN: s_load_dword +; SGCN: buffer_store_dword define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { %ext = zext i1 %x to i32 store i32 %ext, i32 addrspace(1)* %out, align 4 @@ -660,9 +658,8 @@ define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwi ; HSA-VI: kernarg_segment_byte_size = 12 ; HSA-VI: kernarg_segment_alignment = 4 -; SI: buffer_load_ubyte -; SI: buffer_store_dwordx2 -; SI: s_endpgm +; GCN: s_load_dword s +; GCN: {{buffer|flat}}_store_dwordx2 define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { %ext = zext i1 %x to i64 store i64 %ext, i64 addrspace(1)* %out, align 8 @@ -673,9 +670,8 @@ define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwi ; HSA-VI: kernarg_segment_byte_size = 12 ; HSA-VI: kernarg_segment_alignment = 4 -; SI: buffer_load_ubyte -; SI: buffer_store_dword -; SI: s_endpgm +; GCN: s_load_dword +; GCN: {{buffer|flat}}_store_dword define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { %ext = sext i1 %x to i32 store i32 %ext, i32addrspace(1)* %out, align 4 @@ -686,11 +682,9 @@ define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwi ; HSA-VI: kernarg_segment_byte_size = 12 ; HSA-VI: kernarg_segment_alignment = 4 -; SI: buffer_load_ubyte -; SI: v_bfe_i32 -; SI: v_ashrrev_i32 -; SI: buffer_store_dwordx2 -; SI: s_endpgm +; GCN: s_load_dword +; GCN: s_bfe_i64 +; GCN: {{buffer|flat}}_store_dwordx2 define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { %ext = sext i1 %x to i64 store i64 %ext, i64 addrspace(1)* %out, align 8 |