diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-05-22 06:32:10 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-05-22 06:32:10 +0000 |
commit | 1349a04ef5f594dda705ec80474dda4837f26dba (patch) | |
tree | c33cdd2eb97c0a6c41289054e0d700cfc46ac2fa /llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll | |
parent | bfd08534b020b0feb420766a8d2b3fb0f295f551 (diff) | |
download | bcm5719-llvm-1349a04ef5f594dda705ec80474dda4837f26dba.tar.gz bcm5719-llvm-1349a04ef5f594dda705ec80474dda4837f26dba.zip |
AMDGPU: Make v2i16/v2f16 legal on VI
This usually results in better code. Fixes using
inline asm with short2, and also fixes having a different
ABI for function parameters between VI and gfx9.
Partially cleans up the mess used for lowering of the d16
operations. Making v4f16 legal will help clean this up more,
but this requires additional work.
llvm-svn: 332953
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll | 24 |
1 files changed, 10 insertions, 14 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll index b43271c1bd0..a4722876d3f 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -73,12 +73,9 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa ; CI: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, v{{[0-9]+}} ; CI: v_or_b32_e32 [[OR:v[0-9]+]], v{{[0-9]+}}, [[SHL]] ; CI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, [[OR]] -; VI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} -; VI: v_mov_b32_e32 [[VMASK:v[0-9]+]], [[MASK]] -; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; CIVI: flat_store_dword +; FIXME: Random commute +; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000 ; GFX9: s_or_b32 s{{[0-9]+}}, 0x80008000, s{{[0-9]+}} define amdgpu_kernel void @s_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) { %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) @@ -95,14 +92,13 @@ define amdgpu_kernel void @s_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x ; CI: v_or_b32_e32 [[OR1:v[0-9]+]], v{{[0-9]+}}, [[SHL1]] ; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], [[OR0]] ; CI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], [[OR1]] -; VI: s_mov_b32 [[MASK:s[0-9]+]], 0x8000{{$}} -; VI: v_mov_b32_e32 [[VMASK:v[0-9]+]], [[MASK]] -; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], -; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VMASK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI: v_or_b32_e32 v{{[0-9]+}}, [[MASK]], - -; GFX9: s_mov_b32 [[MASK:s[0-9]+]], 0x80008000 + +; FIXME: Random commute +; GFX89: s_mov_b32 [[MASK:s[0-9]+]], 0x80008000 + +; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[MASK]] +; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[MASK]] + ; GFX9: s_or_b32 s{{[0-9]+}}, [[MASK]], s{{[0-9]+}} ; GFX9: s_or_b32 s{{[0-9]+}}, [[MASK]], s{{[0-9]+}} @@ -120,7 +116,7 @@ define amdgpu_kernel void @fneg_fabs_v4f16(<4 x half> addrspace(1)* %out, <4 x h ; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} ; CI: v_mul_f32_e32 v{{[0-9]+}}, 4.0, v{{[0-9]+}} -; VI: v_mul_f16_e64 v{{[0-9]+}}, -|v{{[0-9]+}}|, 4.0 +; VI: v_mul_f16_e64 v{{[0-9]+}}, -|s{{[0-9]+}}|, 4.0 ; VI: v_mul_f16_sdwa v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9: s_and_b32 [[ABS:s[0-9]+]], s{{[0-9]+}}, 0x7fff7fff |