diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-05-22 20:13:34 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-05-22 20:13:34 +0000 |
commit | 606bc315d66e9aa077d04b1076227263b9c971e6 (patch) | |
tree | cc9dbfd6d443e465d2bdf6bcc1d5ae423484c055 | |
parent | 785acce51dc3f1d3398222d5bfa0ab2f2dfa8442 (diff) | |
download | bcm5719-llvm-606bc315d66e9aa077d04b1076227263b9c971e6.tar.gz bcm5719-llvm-606bc315d66e9aa077d04b1076227263b9c971e6.zip |
AMDGPU: Fix v2f16 fneg/fabs pattern
The integer operation convertion for some reason only happens
if the source is a bitcast from an integer, which happens to
always be the situation when the result is loaded. Add
an additional pattern for when the source operation is really
an FP operation.
llvm-svn: 333019
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll | 22 |
2 files changed, 25 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 363b0e71223..27efb8e7dea 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -996,6 +996,11 @@ def : GCNPat < (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit >; +def : GCNPat < + (fneg (v2f16 (fabs v2f16:$src))), + (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit +>; + /********** ================== **********/ /********** Immediate Patterns **********/ /********** ================== **********/ diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll index a4722876d3f..1ece288cd5e 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -68,8 +68,26 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa ret void } +; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_non_bc_src: +; GFX9-DAG: s_load_dword [[VAL:s[0-9]+]] +; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x40003c00 +; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[VAL]], [[K]] +; GFX9: v_or_b32_e32 [[RESULT:v[0-9]+]], 0x80008000, [[ADD]] + +; VI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}} +define amdgpu_kernel void @s_fneg_fabs_v2f16_non_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) { + %add = fadd <2 x half> %in, <half 1.0, half 2.0> + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %add) + %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs + store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out + ret void +} + ; FIXME: single bit op -; GCN-LABEL: {{^}}s_fneg_fabs_v2f16: + +; Combine turns this into integer op when bitcast source (from load) + +; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_bc_src: ; CI: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, v{{[0-9]+}} ; CI: v_or_b32_e32 [[OR:v[0-9]+]], v{{[0-9]+}}, [[SHL]] ; CI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, [[OR]] @@ -77,7 +95,7 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa ; FIXME: Random commute ; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000 ; GFX9: s_or_b32 s{{[0-9]+}}, 0x80008000, s{{[0-9]+}} -define amdgpu_kernel void @s_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) { +define amdgpu_kernel void @s_fneg_fabs_v2f16_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) { %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out |