diff options
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll | 22 |
2 files changed, 25 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 363b0e71223..27efb8e7dea 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -996,6 +996,11 @@ def : GCNPat < (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit >; +def : GCNPat < + (fneg (v2f16 (fabs v2f16:$src))), + (S_OR_B32 (S_MOV_B32 (i32 0x80008000)), $src) // Set sign bit +>; + /********** ================== **********/ /********** Immediate Patterns **********/ /********** ================== **********/ diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll index a4722876d3f..1ece288cd5e 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll @@ -68,8 +68,26 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa ret void } +; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_non_bc_src: +; GFX9-DAG: s_load_dword [[VAL:s[0-9]+]] +; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x40003c00 +; GFX9: v_pk_add_f16 [[ADD:v[0-9]+]], [[VAL]], [[K]] +; GFX9: v_or_b32_e32 [[RESULT:v[0-9]+]], 0x80008000, [[ADD]] + +; VI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, v{{[0-9]+}} +define amdgpu_kernel void @s_fneg_fabs_v2f16_non_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) { + %add = fadd <2 x half> %in, <half 1.0, half 2.0> + %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %add) + %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs + store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out + ret void +} + ; FIXME: single bit op -; GCN-LABEL: {{^}}s_fneg_fabs_v2f16: + +; Combine turns this into integer op when bitcast source (from load) + +; GCN-LABEL: {{^}}s_fneg_fabs_v2f16_bc_src: ; CI: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 16, v{{[0-9]+}} ; CI: v_or_b32_e32 [[OR:v[0-9]+]], v{{[0-9]+}}, [[SHL]] ; CI: v_or_b32_e32 v{{[0-9]+}}, 0x80008000, [[OR]] @@ -77,7 +95,7 @@ define amdgpu_kernel void @v_fneg_fabs_f16(half addrspace(1)* %out, half addrspa ; FIXME: Random commute ; VI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008000 ; GFX9: s_or_b32 s{{[0-9]+}}, 0x80008000, s{{[0-9]+}} -define amdgpu_kernel void @s_fneg_fabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in) { +define amdgpu_kernel void @s_fneg_fabs_v2f16_bc_src(<2 x half> addrspace(1)* %out, <2 x half> %in) { %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %in) %fneg.fabs = fsub <2 x half> <half -0.0, half -0.0>, %fabs store <2 x half> %fneg.fabs, <2 x half> addrspace(1)* %out |