diff options
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/half.ll | 34 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/kernel-args.ll | 180 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll | 6 |
4 files changed, 94 insertions, 142 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll index 1ece288cd5e..d7141efc82d 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -88,11 +88,9 @@ define amdgpu_kernel void @s_fneg_fabs_v2f16_non_bc_src(<2 x half> addrspace(1)* ; Combine turns this into integer op when bitcast source (from load) ; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_bc_src: -; CI: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, v{{[0-9]+}} -; CI: v_or_b32_e32 [[OR:v[0-9]+]], v{{[0-9]+}}, [[SHL]] -; CI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, [[OR]] ; FIXME: Random commute +; CI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000 ; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000 ; GFX9: s_or_b32 s{{[0-9]+}}, 0x80008000, s{{[0-9]+}} define amdgpu_kernel void @s_fneg_fabs_v2f16_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) { @@ -103,16 +101,12 @@ define amdgpu_kernel void @s_fneg_fabs_v2f16_bc_src(<2 x half> addrspace(1)* %ou } ; GCN-LABEL: {{^}}fneg_fabs_v4f16: -; CI: s_mov_b32 [[MASK:s[0-9]+]], 0x80008000 -; CI: v_lshlrev_b32_e32 [[SHL0:v[0-9]+]], 16, v{{[0-9]+}} -; CI: v_or_b32_e32 [[OR0:v[0-9]+]], v{{[0-9]+}}, [[SHL0]] -; CI: v_lshlrev_b32_e32 [[SHL1:v[0-9]+]], 16, v{{[0-9]+}} -; CI: v_or_b32_e32 [[OR1:v[0-9]+]], v{{[0-9]+}}, [[SHL1]] -; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], [[OR0]] -; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], [[OR1]] ; FIXME: Random commute -; GFX89: s_mov_b32 [[MASK:s[0-9]+]], 0x80008000 +; GCN: s_mov_b32 [[MASK:s[0-9]+]], 0x80008000 + +; CI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[MASK]] +; CI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[MASK]] ; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[MASK]] ; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[MASK]] diff --git a/llvm/test/CodeGen/AMDGPU/half.ll b/llvm/test/CodeGen/AMDGPU/half.ll index a042700edf8..f31b2ab5563 100644 --- a/llvm/test/CodeGen/AMDGPU/half.ll +++ b/llvm/test/CodeGen/AMDGPU/half.ll @@ -13,17 +13,10 @@ define amdgpu_kernel void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 { ret void } -; FIXME: Should always be the same ; GCN-LABEL: {{^}}load_v2f16_arg: -; SI-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 -; SI-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46 -; SI: v_lshlrev_b32_e32 [[HI:v[0-9]+]], 16, [[V1]] -; SI: v_or_b32_e32 [[PACKED:v[0-9]+]], [[V0]], [[HI]] -; SI: buffer_store_dword [[PACKED]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} - -; VI: s_load_dword [[ARG:s[0-9]+]] -; VI: v_mov_b32_e32 [[V_ARG:v[0-9]+]], [[ARG]] -; VI: buffer_store_dword [[V_ARG]] +; GCN: s_load_dword [[ARG:s[0-9]+]] +; GCN: v_mov_b32_e32 [[V_ARG:v[0-9]+]], [[ARG]] +; GCN: buffer_store_dword [[V_ARG]] define amdgpu_kernel void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 { store <2 x half> %arg, <2 x half> addrspace(1)* %out ret void @@ -31,8 +24,8 @@ define amdgpu_kernel void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x ha ; GCN-LABEL: {{^}}load_v3f16_arg: ; GCN: buffer_load_ushort -; GCN: buffer_load_ushort -; GCN: buffer_load_ushort +; GCN: s_load_dword s + ; GCN-NOT: buffer_load ; GCN-DAG: buffer_store_dword ; GCN-DAG: buffer_store_short @@ -43,19 +36,14 @@ define amdgpu_kernel void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x ha ret void } -; GCN-LABEL: {{^}}load_v4f16_arg: -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_store_dwordx2 ; FIXME: Why not one load? -; VI-DAG: s_load_dword [[ARG0_LO:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c -; VI-DAG: s_load_dword [[ARG0_HI:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x30 -; VI-DAG: v_mov_b32_e32 v[[V_ARG0_LO:[0-9]+]], [[ARG0_LO]] -; VI-DAG: v_mov_b32_e32 v[[V_ARG0_HI:[0-9]+]], [[ARG0_HI]] -; VI: buffer_store_dwordx2 v{{\[}}[[V_ARG0_LO]]:[[V_ARG0_HI]]{{\]}} +; GCN-LABEL: {{^}}load_v4f16_arg: +; GCN-DAG: s_load_dword [[ARG0_LO:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} +; GCN-DAG: s_load_dword [[ARG0_HI:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}} +; GCN-DAG: v_mov_b32_e32 v[[V_ARG0_LO:[0-9]+]], [[ARG0_LO]] +; GCN-DAG: v_mov_b32_e32 v[[V_ARG0_HI:[0-9]+]], [[ARG0_HI]] +; GCN: buffer_store_dwordx2 v{{\[}}[[V_ARG0_LO]]:[[V_ARG0_HI]]{{\]}} define amdgpu_kernel void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 { store <4 x half> %arg, <4 x half> addrspace(1)* %out ret void diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll index f51366f2665..cb97d716e38 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll @@ -162,10 +162,11 @@ entry: ; EG: VTX_READ_8 ; EG: VTX_READ_8 -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte + +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte + +; HSA: flat_load_ushort define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { entry: store <2 x i8> %in, <2 x i8> addrspace(1)* %out @@ -179,10 +180,9 @@ entry: ; EG: VTX_READ_16 ; EG: VTX_READ_16 -; SI: buffer_load_ushort -; SI: buffer_load_ushort - -; VI: s_load_dword s +; SI: s_load_dword s{{[0-9]+}}, s[0:1], 0xb +; MESA-VI: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x2c +; HSA-VI: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x8 define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) { entry: store <2 x i16> %in, <2 x i16> addrspace(1)* %out @@ -226,11 +226,14 @@ entry: ; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40 ; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41 ; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42 -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte + +; MESA-VI: buffer_load_ushort +; MESA-VI: buffer_load_ubyte + +; HSA-VI: flat_load_ushort ; HSA-VI: flat_load_ubyte define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { entry: @@ -245,12 +248,9 @@ entry: ; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44 ; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46 ; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48 -; MESA-GCN: buffer_load_ushort -; MESA-GCN: buffer_load_ushort -; MESA-GCN: buffer_load_ushort -; HSA-VI: flat_load_ushort -; HSA-VI: flat_load_ushort -; HSA-VI: flat_load_ushort + +; GCN-DAG: s_load_dword s +; GCN-DAG: {{buffer|flat}}_load_ushort define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { entry: store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 @@ -293,14 +293,13 @@ entry: ; EG: VTX_READ_8 ; EG: VTX_READ_8 ; EG: VTX_READ_8 -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte + +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte + +; VI: s_load_dword s define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { entry: store <4 x i8> %in, <4 x i8> addrspace(1)* %out @@ -315,13 +314,14 @@ entry: ; EG: VTX_READ_16 ; EG: VTX_READ_16 -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort +; SI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0xb +; SI-DAG: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x9 -; VI: s_load_dword s -; VI: s_load_dword s +; MESA-VI-DAG: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x2c +; MESA-VI-DAG: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x30 + +; HSA-VI-DAG: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x8 +; HSA-VI-DAG: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xc define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { entry: store <4 x i16> %in, <4 x i16> addrspace(1)* %out @@ -372,21 +372,17 @@ entry: ; EG: VTX_READ_8 ; EG: VTX_READ_8 ; EG: VTX_READ_8 -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; HSA-GCN: float_load_ubyte -; HSA-GCN: float_load_ubyte -; HSA-GCN: float_load_ubyte -; HSA-GCN: float_load_ubyte -; HSA-GCN: float_load_ubyte -; HSA-GCN: float_load_ubyte -; HSA-GCN: float_load_ubyte -; HSA-GCN: float_load_ubyte + +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte + +; VI: s_load_dwordx2 +; VI: s_load_dwordx2 define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { entry: store <8 x i8> %in, <8 x i8> addrspace(1)* %out @@ -405,15 +401,11 @@ entry: ; EG: VTX_READ_16 ; EG: VTX_READ_16 -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; VI: s_load_dwordx2 ; VI: s_load_dword s ; VI: s_load_dword s ; VI: s_load_dword s @@ -481,38 +473,27 @@ entry: ; EG: VTX_READ_8 ; EG: VTX_READ_8 ; EG: VTX_READ_8 -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; MESA-GCN: buffer_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte -; HSA-VI: flat_load_ubyte + +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte + +; VI: s_load_dwordx2 +; VI: s_load_dwordx2 +; VI: s_load_dwordx2 define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { entry: store <16 x i8> %in, <16 x i8> addrspace(1)* %out @@ -539,22 +520,13 @@ entry: ; EG: VTX_READ_16 ; EG: VTX_READ_16 -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort -; SI: buffer_load_ushort +; SI: s_load_dword s +; SI: s_load_dword s +; SI: s_load_dword s +; SI: s_load_dword s +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 +; SI: s_load_dwordx2 ; VI: s_load_dword s ; VI: s_load_dword s diff --git a/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll b/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll index 601aca48e1e..f2f845b86db 100644 --- a/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll +++ b/llvm/test/CodeGen/AMDGPU/reduce-store-width-alignment.ll @@ -39,10 +39,8 @@ define amdgpu_kernel void @store_v4i32_as_v2i64_align_4(<2 x i64> addrspace(3)* } ; GCN-LABEL: {{^}}store_v4i16_as_v2i32_align_4: -; GCN: buffer_load_ushort -; GCN: buffer_load_ushort -; GCN: buffer_load_ushort -; GCN: buffer_load_ushort +; GCN: s_load_dword s +; GCN: s_load_dwordx2 s ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1{{$}} define amdgpu_kernel void @store_v4i16_as_v2i32_align_4(<2 x i32> addrspace(3)* align 4 %out, <4 x i16> %x) #0 { %x.bc = bitcast <4 x i16> %x to <2 x i32> |