AMDGPU: Fix v2f16 fneg/fabs pattern

The integer operation convertion for some reason only happens if the source is a bitcast from an integer, which happens to always be the situation when the result is loaded. Add an additional pattern for when the source operation is really an FP operation. llvm-svn: 333019
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2018-05-22 20:13:34 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2018-05-22 20:13:34 +0000
commit: 606bc315d66e9aa077d04b1076227263b9c971e6 (patch)
tree: cc9dbfd6d443e465d2bdf6bcc1d5ae423484c055
parent: 785acce51dc3f1d3398222d5bfa0ab2f2dfa8442 (diff)
download: bcm5719-llvm-606bc315d66e9aa077d04b1076227263b9c971e6.tar.gz
bcm5719-llvm-606bc315d66e9aa077d04b1076227263b9c971e6.zip
2 files changed, 25 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 363b0e71223..27efb8e7dea 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -996,6 +996,11 @@ def : GCNPat <
   (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit
 >;
 
+def : GCNPat <
+  (fneg (v2f16 (fabs v2f16:$src))),
+  (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit
+>;
+
 /********** ================== **********/
 /********** Immediate Patterns **********/
 /********** ================== **********/
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
index a4722876d3f..1ece288cd5e 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
@@ -68,8 +68,26 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa
   ret void
 }
 
+; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_non_bc_src:
+; GFX9-DAG: s_load_dword [[VAL:s[0-9]+]]
+; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x40003c00
+; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[VAL]], [[K]]
+; GFX9: v_or_b32_e32 [[RESULT:v[0-9]+]], 0x80008000, [[ADD]]
+
+; VI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}}
+define amdgpu_kernel void @s_fneg_fabs_v2f16_non_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) {
+  %add = fadd <2 x half> %in, <half 1.0, half 2.0>
+  %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %add)
+  %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
+  store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out
+  ret void
+}
+
 ; FIXME: single bit op
-; GCN-LABEL: {{^}}s_fneg_fabs_v2f16:
+
+; Combine turns this into integer op when bitcast source (from load)
+
+; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_bc_src:
 ; CI: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, v{{[0-9]+}}
 ; CI: v_or_b32_e32 [[OR:v[0-9]+]], v{{[0-9]+}}, [[SHL]]
 ; CI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, [[OR]]
@@ -77,7 +95,7 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa
 ; FIXME: Random commute
 ; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000
 ; GFX9: s_or_b32 s{{[0-9]+}}, 0x80008000, s{{[0-9]+}}
-define amdgpu_kernel void @s_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) {
+define amdgpu_kernel void @s_fneg_fabs_v2f16_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) {
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
   %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
   store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2018-05-22 20:13:34 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2018-05-22 20:13:34 +0000
commit	606bc315d66e9aa077d04b1076227263b9c971e6 (patch)
tree	cc9dbfd6d443e465d2bdf6bcc1d5ae423484c055
parent	785acce51dc3f1d3398222d5bfa0ab2f2dfa8442 (diff)
download	bcm5719-llvm-606bc315d66e9aa077d04b1076227263b9c971e6.tar.gz bcm5719-llvm-606bc315d66e9aa077d04b1076227263b9c971e6.zip