From 7d88275577d2c72cc6dfbc47591adce8b25b14f0 Mon Sep 17 00:00:00 2001 From: Konstantin Zhuravlyov Date: Fri, 13 Jan 2017 19:49:25 +0000 Subject: [AMDGPU] Implement f16 fcopysign and fcopysign(f32, f64) Differential Revision: https://reviews.llvm.org/D28496 llvm-svn: 291954 --- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 6 ++++++ llvm/lib/Target/AMDGPU/SIInstructions.td | 31 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) (limited to 'llvm/lib/Target/AMDGPU') diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 59cba636c58..606b6cea2e4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -564,6 +564,12 @@ multiclass BFIPatterns ; + def : Pat < + (f32 (fcopysign f32:$src0, f64:$src1)), + (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, + (i32 (EXTRACT_SUBREG $src1, sub1))) + >; + def : Pat < (f64 (fcopysign f64:$src0, f64:$src1)), (REG_SEQUENCE RC64, diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index b86c0419118..7810b0d7060 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -677,6 +677,37 @@ def : Pat < sub1) >; +def : Pat < + (fcopysign f16:$src0, f16:$src1), + (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, $src1) +>; + +def : Pat < + (fcopysign f32:$src0, f16:$src1), + (V_BFI_B32 (S_MOV_B32 (i32 0x7fffffff)), $src0, + (V_LSHLREV_B32_e64 (i32 16), $src1)) +>; + +def : Pat < + (fcopysign f64:$src0, f16:$src1), + (REG_SEQUENCE SReg_64, + (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, + (V_BFI_B32 (S_MOV_B32 (i32 0x7fffffff)), (i32 (EXTRACT_SUBREG $src0, sub1)), + (V_LSHLREV_B32_e64 (i32 16), $src1)), sub1) +>; + +def : Pat < + (fcopysign f16:$src0, f32:$src1), + (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, + (V_LSHRREV_B32_e64 (i32 16), $src1)) +>; + +def : Pat < + (fcopysign f16:$src0, f64:$src1), + (V_BFI_B32 (S_MOV_B32 (i32 0x00007fff)), $src0, + (V_LSHRREV_B32_e64 (i32 16), (EXTRACT_SUBREG $src1, sub1))) +>; + def : Pat < (fneg f16:$src), (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x00008000))) -- cgit v1.2.3