diff options
Diffstat (limited to 'llvm/lib/Target/X86')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFMA.td | 80 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 8 |
2 files changed, 49 insertions, 39 deletions
diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td index 453dcd83df1..15466c2978f 100644 --- a/llvm/lib/Target/X86/X86InstrFMA.td +++ b/llvm/lib/Target/X86/X86InstrFMA.td @@ -290,8 +290,7 @@ multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231, } multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231, - string OpStr, Intrinsic IntF32, Intrinsic IntF64, - SDNode OpNode> { + string OpStr, SDNode OpNodeIntrin, SDNode OpNode> { let ExeDomain = SSEPackedSingle in defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", OpNode, FR32, f32mem>, @@ -309,43 +308,44 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231, // This is because src1 is tied to dest, and the scalar intrinsics // require the pass-through values to come from the first source // operand, not the second. - // TODO: Use AVX512 instructions when possible. - let Predicates = [HasFMA] in { - def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3), + let Predicates = [HasFMA, NoAVX512] in { + def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, VR128:$src2, VR128:$src3)), (!cast<Instruction>(NAME#"213SSr_Int") VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3), + def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, VR128:$src2, VR128:$src3)), (!cast<Instruction>(NAME#"213SDr_Int") VR128:$src1, VR128:$src2, VR128:$src3)>; - def : Pat<(IntF32 VR128:$src1, VR128:$src2, sse_load_f32:$src3), + def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, VR128:$src2, + sse_load_f32:$src3)), (!cast<Instruction>(NAME#"213SSm_Int") VR128:$src1, VR128:$src2, sse_load_f32:$src3)>; - def : Pat<(IntF64 VR128:$src1, VR128:$src2, sse_load_f64:$src3), + def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, VR128:$src2, + sse_load_f64:$src3)), (!cast<Instruction>(NAME#"213SDm_Int") VR128:$src1, VR128:$src2, sse_load_f64:$src3)>; - def : Pat<(IntF32 VR128:$src1, sse_load_f32:$src3, VR128:$src2), + def : Pat<(v4f32 (OpNodeIntrin VR128:$src1, sse_load_f32:$src3, + VR128:$src2)), (!cast<Instruction>(NAME#"132SSm_Int") VR128:$src1, VR128:$src2, sse_load_f32:$src3)>; - def : Pat<(IntF64 VR128:$src1, sse_load_f64:$src3, VR128:$src2), + def : Pat<(v2f64 (OpNodeIntrin VR128:$src1, sse_load_f64:$src3, + VR128:$src2)), (!cast<Instruction>(NAME#"132SDm_Int") VR128:$src1, VR128:$src2, sse_load_f64:$src3)>; } } -defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss, - int_x86_fma_vfmadd_sd, X86Fmadd>, VEX_LIG; -defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", int_x86_fma_vfmsub_ss, - int_x86_fma_vfmsub_sd, X86Fmsub>, VEX_LIG; +defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadds1, X86Fmadd>, VEX_LIG; +defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsubs1, X86Fmsub>, VEX_LIG; -defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", int_x86_fma_vfnmadd_ss, - int_x86_fma_vfnmadd_sd, X86Fnmadd>, VEX_LIG; -defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", int_x86_fma_vfnmsub_ss, - int_x86_fma_vfnmsub_sd, X86Fnmsub>, VEX_LIG; +defm VFNMADD : fma3s<0x9D, 0xAD, 0xBD, "vfnmadd", X86Fnmadds1, X86Fnmadd>, + VEX_LIG; +defm VFNMSUB : fma3s<0x9F, 0xAF, 0xBF, "vfnmsub", X86Fnmsubs1, X86Fnmsub>, + VEX_LIG; //===----------------------------------------------------------------------===// @@ -385,26 +385,28 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in } multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop, - ComplexPattern mem_cpat, Intrinsic Int> { + ValueType VT, ComplexPattern mem_cpat, SDNode OpNode> { let isCodeGenOnly = 1 in { def rr_Int : FMA4<opc, MRMSrcRegOp4, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, VEX_LIG; + (VT (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, VEX_W, + VEX_LIG; def rm_Int : FMA4<opc, MRMSrcMemOp4, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, memop:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, - mem_cpat:$src3))]>, VEX_W, VEX_LIG; + [(set VR128:$dst, (VT (OpNode VR128:$src1, VR128:$src2, + mem_cpat:$src3)))]>, VEX_W, VEX_LIG; def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, memop:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, - (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG; + (VT (OpNode VR128:$src1, mem_cpat:$src2, VR128:$src3)))]>, + VEX_LIG; let hasSideEffects = 0 in def rr_Int_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), @@ -475,19 +477,19 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { let ExeDomain = SSEPackedSingle in { // Scalar Instructions defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, - fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, - int_x86_fma_vfmadd_ss>; + fma4s_int<0x6A, "vfmaddss", ssmem, v4f32, sse_load_f32, + X86Fmadds1>; defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, - fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32, - int_x86_fma_vfmsub_ss>; + fma4s_int<0x6E, "vfmsubss", ssmem, v4f32, sse_load_f32, + X86Fmsubs1>; defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, X86Fnmadd, loadf32>, - fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32, - int_x86_fma_vfnmadd_ss>; + fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32, sse_load_f32, + X86Fnmadds1>; defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, X86Fnmsub, loadf32>, - fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, - int_x86_fma_vfnmsub_ss>; + fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32, sse_load_f32, + X86Fnmsubs1>; // Packed Instructions defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32, loadv4f32, loadv8f32>; @@ -506,19 +508,19 @@ let ExeDomain = SSEPackedSingle in { let ExeDomain = SSEPackedDouble in { // Scalar Instructions defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, - fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfmadd_sd>; + fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64, sse_load_f64, + X86Fmadds1>; defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, - fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfmsub_sd>; + fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64, sse_load_f64, + X86Fmsubs1>; defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, X86Fnmadd, loadf64>, - fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfnmadd_sd>; + fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64, sse_load_f64, + X86Fnmadds1>; defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, X86Fnmsub, loadf64>, - fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfnmsub_sd>; + fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64, sse_load_f64, + X86Fnmsubs1>; // Packed Instructions defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64, loadv2f64, loadv4f64>; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 6132234c006..128d0942a73 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1468,6 +1468,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, ISD::FMA, 0), X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, ISD::FMA, 0), X86_INTRINSIC_DATA(fma_vfmadd_ps_256, INTR_TYPE_3OP, ISD::FMA, 0), + X86_INTRINSIC_DATA(fma_vfmadd_sd, INTR_TYPE_3OP, X86ISD::FMADDS1, 0), + X86_INTRINSIC_DATA(fma_vfmadd_ss, INTR_TYPE_3OP, X86ISD::FMADDS1, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_pd, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_pd_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), X86_INTRINSIC_DATA(fma_vfmaddsub_ps, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0), @@ -1476,6 +1478,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(fma_vfmsub_pd_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0), X86_INTRINSIC_DATA(fma_vfmsub_ps, INTR_TYPE_3OP, X86ISD::FMSUB, 0), X86_INTRINSIC_DATA(fma_vfmsub_ps_256, INTR_TYPE_3OP, X86ISD::FMSUB, 0), + X86_INTRINSIC_DATA(fma_vfmsub_sd, INTR_TYPE_3OP, X86ISD::FMSUBS1, 0), + X86_INTRINSIC_DATA(fma_vfmsub_ss, INTR_TYPE_3OP, X86ISD::FMSUBS1, 0), X86_INTRINSIC_DATA(fma_vfmsubadd_pd, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0), X86_INTRINSIC_DATA(fma_vfmsubadd_pd_256, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0), X86_INTRINSIC_DATA(fma_vfmsubadd_ps, INTR_TYPE_3OP, X86ISD::FMSUBADD, 0), @@ -1484,10 +1488,14 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(fma_vfnmadd_pd_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0), X86_INTRINSIC_DATA(fma_vfnmadd_ps, INTR_TYPE_3OP, X86ISD::FNMADD, 0), X86_INTRINSIC_DATA(fma_vfnmadd_ps_256, INTR_TYPE_3OP, X86ISD::FNMADD, 0), + X86_INTRINSIC_DATA(fma_vfnmadd_sd, INTR_TYPE_3OP, X86ISD::FNMADDS1, 0), + X86_INTRINSIC_DATA(fma_vfnmadd_ss, INTR_TYPE_3OP, X86ISD::FNMADDS1, 0), X86_INTRINSIC_DATA(fma_vfnmsub_pd, INTR_TYPE_3OP, X86ISD::FNMSUB, 0), X86_INTRINSIC_DATA(fma_vfnmsub_pd_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0), X86_INTRINSIC_DATA(fma_vfnmsub_ps, INTR_TYPE_3OP, X86ISD::FNMSUB, 0), X86_INTRINSIC_DATA(fma_vfnmsub_ps_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0), + X86_INTRINSIC_DATA(fma_vfnmsub_sd, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0), + X86_INTRINSIC_DATA(fma_vfnmsub_ss, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0), X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0), X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ), X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE), |

