summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-05-22 20:13:34 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-05-22 20:13:34 +0000
commit606bc315d66e9aa077d04b1076227263b9c971e6 (patch)
treecc9dbfd6d443e465d2bdf6bcc1d5ae423484c055
parent785acce51dc3f1d3398222d5bfa0ab2f2dfa8442 (diff)
downloadbcm5719-llvm-606bc315d66e9aa077d04b1076227263b9c971e6.tar.gz
bcm5719-llvm-606bc315d66e9aa077d04b1076227263b9c971e6.zip
AMDGPU: Fix v2f16 fneg/fabs pattern
The integer operation convertion for some reason only happens if the source is a bitcast from an integer, which happens to always be the situation when the result is loaded. Add an additional pattern for when the source operation is really an FP operation. llvm-svn: 333019
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td5
-rw-r--r--llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll22
2 files changed, 25 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 363b0e71223..27efb8e7dea 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -996,6 +996,11 @@ def : GCNPat <
(S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit
>;
+def : GCNPat <
+ (fneg (v2f16 (fabs v2f16:$src))),
+ (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit
+>;
+
/********** ================== **********/
/********** Immediate Patterns **********/
/********** ================== **********/
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
index a4722876d3f..1ece288cd5e 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
@@ -68,8 +68,26 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa
ret void
}
+; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_non_bc_src:
+; GFX9-DAG: s_load_dword [[VAL:s[0-9]+]]
+; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x40003c00
+; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[VAL]], [[K]]
+; GFX9: v_or_b32_e32 [[RESULT:v[0-9]+]], 0x80008000, [[ADD]]
+
+; VI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}}
+define amdgpu_kernel void @s_fneg_fabs_v2f16_non_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) {
+ %add = fadd <2 x half> %in, <half 1.0, half 2.0>
+ %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %add)
+ %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
+ store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out
+ ret void
+}
+
; FIXME: single bit op
-; GCN-LABEL: {{^}}s_fneg_fabs_v2f16:
+
+; Combine turns this into integer op when bitcast source (from load)
+
+; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_bc_src:
; CI: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, v{{[0-9]+}}
; CI: v_or_b32_e32 [[OR:v[0-9]+]], v{{[0-9]+}}, [[SHL]]
; CI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, [[OR]]
@@ -77,7 +95,7 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa
; FIXME: Random commute
; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000
; GFX9: s_or_b32 s{{[0-9]+}}, 0x80008000, s{{[0-9]+}}
-define amdgpu_kernel void @s_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) {
+define amdgpu_kernel void @s_fneg_fabs_v2f16_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) {
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in)
%fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs
store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out
OpenPOWER on IntegriCloud