diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFMA.td | 44 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFormats.td | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 2 |
8 files changed, 56 insertions, 24 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9985b727c0e..2163efd30aa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25169,6 +25169,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FNMADDS3_RND: return "X86ISD::FNMADDS3_RND"; case X86ISD::FMSUBS3_RND: return "X86ISD::FMSUBS3_RND"; case X86ISD::FNMSUBS3_RND: return "X86ISD::FNMSUBS3_RND"; + case X86ISD::FMADD4S: return "X86ISD::FMADD4S"; + case X86ISD::FNMADD4S: return "X86ISD::FNMADD4S"; + case X86ISD::FMSUB4S: return "X86ISD::FMSUB4S"; + case X86ISD::FNMSUB4S: return "X86ISD::FNMSUB4S"; case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H"; case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L"; case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE"; @@ -35724,6 +35728,13 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS3_RND; break; case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS3_RND; break; } + } else if (N->getOpcode() == X86ISD::FMADD4S) { + switch (NewOpcode) { + case ISD::FMA: NewOpcode = X86ISD::FMADD4S; break; + case X86ISD::FMSUB: NewOpcode = X86ISD::FMSUB4S; break; + case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADD4S; break; + case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUB4S; break; + } } else { llvm_unreachable("Unexpected opcode!"); } @@ -37092,6 +37103,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FMADDS3_RND: case X86ISD::FMADDS1: case X86ISD::FMADDS3: + case X86ISD::FMADD4S: case ISD::FMA: return combineFMA(N, DAG, Subtarget); case X86ISD::FMADDSUB_RND: case X86ISD::FMSUBADD_RND: diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 3c831001e9a..61b03be52a9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -505,6 +505,9 @@ namespace llvm { FMADDSUB_RND, FMSUBADD_RND, + // FMA4 specific scalar intrinsics bits that zero the non-scalar bits. + FMADD4S, FNMADD4S, FMSUB4S, FNMSUB4S, + // Scalar intrinsic FMA. FMADDS1, FMADDS3, FNMADDS1, FNMADDS3, diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td index 753351b7dc4..dd6a61ddc3b 100644 --- a/llvm/lib/Target/X86/X86InstrFMA.td +++ b/llvm/lib/Target/X86/X86InstrFMA.td @@ -253,18 +253,18 @@ let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1, hasSideEffects = 0 in multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr, Operand memopr, RegisterClass RC> { - def r_Int : FMA3S<opc, MRMSrcReg, (outs RC:$dst), - (ins RC:$src1, RC:$src2, RC:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; + def r_Int : FMA3S_Int<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, RC:$src2, RC:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + []>; let mayLoad = 1 in - def m_Int : FMA3S<opc, MRMSrcMem, (outs RC:$dst), - (ins RC:$src1, RC:$src2, memopr:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - []>; + def m_Int : FMA3S_Int<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, RC:$src2, memopr:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + []>; } // The FMA 213 form is created for lowering of scalar FMA intrinscis @@ -385,20 +385,20 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop, ValueType VT, ComplexPattern mem_cpat, SDNode OpNode> { let isCodeGenOnly = 1 in { - def rr_Int : FMA4S<opc, MRMSrcRegOp4, (outs VR128:$dst), + def rr_Int : FMA4S_Int<opc, MRMSrcRegOp4, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (VT (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, VEX_W, VEX_LIG; - def rm_Int : FMA4S<opc, MRMSrcMemOp4, (outs VR128:$dst), + def rm_Int : FMA4S_Int<opc, MRMSrcMemOp4, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, memop:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (VT (OpNode VR128:$src1, VR128:$src2, mem_cpat:$src3)))]>, VEX_W, VEX_LIG; - def mr_Int : FMA4S<opc, MRMSrcMem, (outs VR128:$dst), + def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, memop:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), @@ -406,7 +406,7 @@ let isCodeGenOnly = 1 in { (VT (OpNode VR128:$src1, mem_cpat:$src2, VR128:$src3)))]>, VEX_LIG; let hasSideEffects = 0 in - def rr_Int_REV : FMA4S<opc, MRMSrcReg, (outs VR128:$dst), + def rr_Int_REV : FMA4S_Int<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), @@ -476,18 +476,18 @@ let ExeDomain = SSEPackedSingle in { // Scalar Instructions defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, fma4s_int<0x6A, "vfmaddss", ssmem, v4f32, sse_load_f32, - X86Fmadds1>; + X86Fmadd4s>; defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, fma4s_int<0x6E, "vfmsubss", ssmem, v4f32, sse_load_f32, - X86Fmsubs1>; + X86Fmsub4s>; defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, X86Fnmadd, loadf32>, fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32, sse_load_f32, - X86Fnmadds1>; + X86Fnmadd4s>; defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, X86Fnmsub, loadf32>, fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32, sse_load_f32, - X86Fnmsubs1>; + X86Fnmsub4s>; // Packed Instructions defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32, loadv4f32, loadv8f32>; @@ -507,18 +507,18 @@ let ExeDomain = SSEPackedDouble in { // Scalar Instructions defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64, sse_load_f64, - X86Fmadds1>; + X86Fmadd4s>; defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64, sse_load_f64, - X86Fmsubs1>; + X86Fmsub4s>; defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, X86Fnmadd, loadf64>, fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64, sse_load_f64, - X86Fnmadds1>; + X86Fnmadd4s>; defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, X86Fnmsub, loadf64>, fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64, sse_load_f64, - X86Fnmsubs1>; + X86Fnmsub4s>; // Packed Instructions defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64, loadv2f64, loadv4f64>; diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index 371b5046ff3..2a6ed02fada 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -862,10 +862,14 @@ class PCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm, class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, T8PD, - VEX_4V, FMASC, Requires<[HasFMA, NoVLX]>; + VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoVLX]>; class FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern, InstrItinClass itin = NoItinerary> : I<o, F, outs, ins, asm, pattern, itin>, T8PD, + VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoAVX512]>; +class FMA3S_Int<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag>pattern, InstrItinClass itin = NoItinerary> + : I<o, F, outs, ins, asm, pattern, itin>, T8PD, VEX_4V, FMASC, Requires<[HasFMA, NoAVX512]>; // FMA4 Instruction Templates @@ -877,6 +881,10 @@ class FMA4S<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag>pattern, InstrItinClass itin = NoItinerary> : Ii8Reg<o, F, outs, ins, asm, pattern, itin>, TAPD, VEX_4V, FMASC, Requires<[HasFMA4, NoAVX512]>; +class FMA4S_Int<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag>pattern, InstrItinClass itin = NoItinerary> + : Ii8Reg<o, F, outs, ins, asm, pattern, itin>, TAPD, + VEX_4V, FMASC, Requires<[HasFMA4]>; // XOP 2, 3 and 4 Operand Instruction Template class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index e29c6b19bfd..b013d66a21d 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -506,6 +506,12 @@ def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound, [SDNPCommutat def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound, [SDNPCommutative]>; def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>; +// Scalar FMA4 intrinsics which zero the non-scalar bits. +def X86Fmadd4s : SDNode<"X86ISD::FMADD4S", SDTFPTernaryOp, [SDNPCommutative]>; +def X86Fnmadd4s : SDNode<"X86ISD::FNMADD4S", SDTFPTernaryOp, [SDNPCommutative]>; +def X86Fmsub4s : SDNode<"X86ISD::FMSUB4S", SDTFPTernaryOp, [SDNPCommutative]>; +def X86Fnmsub4s : SDNode<"X86ISD::FNMSUB4S", SDTFPTernaryOp, [SDNPCommutative]>; + // Scalar FMA intrinsics with passthru bits in operand 1. def X86Fmadds1 : SDNode<"X86ISD::FMADDS1", SDTFPTernaryOp>; def X86Fnmadds1 : SDNode<"X86ISD::FNMADDS1", SDTFPTernaryOp>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 926080347e7..97d3e6dfb44 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -850,6 +850,7 @@ def NoVLX_Or_NoVPCLMULQDQ : def HasVPCLMULQDQ : Predicate<"Subtarget->hasVPCLMULQDQ()">; def HasFMA : Predicate<"Subtarget->hasFMA()">; def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; +def NoFMA4 : Predicate<"!Subtarget->hasFMA4()">; def HasXOP : Predicate<"Subtarget->hasXOP()">; def HasTBM : Predicate<"Subtarget->hasTBM()">; def NoTBM : Predicate<"!Subtarget->hasTBM()">; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index d611f02a249..598994d07ad 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1593,6 +1593,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(fma_vfnmsub_ps_256, INTR_TYPE_3OP, X86ISD::FNMSUB, 0), X86_INTRINSIC_DATA(fma_vfnmsub_sd, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0), X86_INTRINSIC_DATA(fma_vfnmsub_ss, INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0), + X86_INTRINSIC_DATA(fma4_vfmadd_sd, INTR_TYPE_3OP, X86ISD::FMADD4S, 0), + X86_INTRINSIC_DATA(fma4_vfmadd_ss, INTR_TYPE_3OP, X86ISD::FMADD4S, 0), X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0), X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ), X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE), diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index faf88aba177..50e1a742a0f 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -482,7 +482,7 @@ public: bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; } // Prefer FMA4 to FMA - its better for commutation/memory folding and // has equal or better performance on all supported targets. - bool hasFMA() const { return HasFMA && !HasFMA4; } + bool hasFMA() const { return HasFMA; } bool hasFMA4() const { return HasFMA4; } bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } bool hasXOP() const { return HasXOP; } |

