diff options
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 72 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 60 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 40 |
4 files changed, 86 insertions, 94 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index b955f63296b..0c7aaada4be 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -5219,14 +5219,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) { SelectCode(Res.getNode()); return; } - case ISD::STRICT_FP_ROUND: { - // X87 instructions has enabled this strict fp operation. - bool UsingFp80 = Node->getSimpleValueType(0) == MVT::f80 || - Node->getOperand(1).getSimpleValueType() == MVT::f80; - if (UsingFp80 || (!Subtarget->hasSSE1() && Subtarget->hasX87())) - break; - LLVM_FALLTHROUGH; - } case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: // FIXME: Remove when we have isel patterns for strict versions of these diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3697b5328e9..7733ad66162 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -591,13 +591,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSIN , VT, Expand); setOperationAction(ISD::FCOS , VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); - - // Handle constrained floating-point operations of scalar. - setOperationAction(ISD::STRICT_FSQRT , VT, Legal); - setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); - // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten - // as Custom. - setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); } } @@ -622,14 +615,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, addLegalFPImmediate(APFloat(+0.0)); // xorpd } // Handle constrained floating-point operations of scalar. - setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); - setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); - setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); // We don't support FMA. setOperationAction(ISD::FMA, MVT::f64, Expand); @@ -857,17 +855,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::LOAD, MVT::v2f32, Custom); setOperationAction(ISD::STORE, MVT::v2f32, Custom); - // FIXME: Currently mutated to non-strict form in X86ISelDAGToDAG::Select, - // but its sufficient to pretend they're Legal since they will be someday. - setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); - setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); - setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); - setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); - setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { @@ -1042,6 +1034,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // With AVX512, expanding (and promoting the shifts) is better. if (!Subtarget.hasAVX512()) setOperationAction(ISD::ROTL, MVT::v16i8, Custom); + + setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) { @@ -1157,9 +1155,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); - // FIXME: Currently mutated to non-strict form in X86ISelDAGToDAG::Select, - // but its sufficient to pretend they're Legal since they will be someday. - setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal); @@ -1168,6 +1164,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal); if (!Subtarget.hasAVX512()) setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); @@ -1430,17 +1429,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); - // FIXME: Currently mutated to non-strict form in X86ISelDAGToDAG::Select, - // but its sufficient to pretend their Legal since they will be someday. - setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f32, Legal); - setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal); - setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal); - setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal); - setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal); - setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal); - setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal); - setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal); - setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 3f782f7586b..ab94950a635 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7464,28 +7464,28 @@ defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts, X86fpextsSAE, WriteCvtSS2SD, f32x_info, f64x_info>; -def : Pat<(f64 (fpextend FR32X:$src)), +def : Pat<(f64 (any_fpextend FR32X:$src)), (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, Requires<[HasAVX512]>; -def : Pat<(f64 (fpextend (loadf32 addr:$src))), +def : Pat<(f64 (any_fpextend (loadf32 addr:$src))), (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512, OptForSize]>; -def : Pat<(f32 (fpround FR64X:$src)), +def : Pat<(f32 (any_fpround FR64X:$src)), (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, Requires<[HasAVX512]>; def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector - (f32 (fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), + (f32 (any_fpround (f64 (extractelt VR128X:$src, (iPTR 0))))))))), (VCVTSD2SSZrr_Int VR128X:$dst, VR128X:$src)>, Requires<[HasAVX512]>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector - (f64 (fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), + (f64 (any_fpextend (f32 (extractelt VR128X:$src, (iPTR 0))))))))), (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, Requires<[HasAVX512]>; @@ -7583,14 +7583,14 @@ multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info, - fpextend, sched.ZMM>, + any_fpextend, sched.ZMM>, avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info, X86vfpextSAE, sched.ZMM>, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info, X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; - defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend, + defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, any_fpextend, sched.YMM>, EVEX_V256; } } @@ -7657,63 +7657,63 @@ defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>, PS, EVEX_CD8<32, CD8VH>; let Predicates = [HasAVX512] in { - def : Pat<(v8f32 (fpround (v8f64 VR512:$src))), + def : Pat<(v8f32 (any_fpround (v8f64 VR512:$src))), (VCVTPD2PSZrr VR512:$src)>; - def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))), + def : Pat<(vselect VK8WM:$mask, (v8f32 (any_fpround (v8f64 VR512:$src))), VR256X:$src0), (VCVTPD2PSZrrk VR256X:$src0, VK8WM:$mask, VR512:$src)>; - def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (v8f64 VR512:$src))), + def : Pat<(vselect VK8WM:$mask, (v8f32 (any_fpround (v8f64 VR512:$src))), v8f32x_info.ImmAllZerosV), (VCVTPD2PSZrrkz VK8WM:$mask, VR512:$src)>; - def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))), + def : Pat<(v8f32 (any_fpround (loadv8f64 addr:$src))), (VCVTPD2PSZrm addr:$src)>; - def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))), + def : Pat<(vselect VK8WM:$mask, (v8f32 (any_fpround (loadv8f64 addr:$src))), VR256X:$src0), (VCVTPD2PSZrmk VR256X:$src0, VK8WM:$mask, addr:$src)>; - def : Pat<(vselect VK8WM:$mask, (v8f32 (fpround (loadv8f64 addr:$src))), + def : Pat<(vselect VK8WM:$mask, (v8f32 (any_fpround (loadv8f64 addr:$src))), v8f32x_info.ImmAllZerosV), (VCVTPD2PSZrmkz VK8WM:$mask, addr:$src)>; - def : Pat<(v8f32 (fpround (v8f64 (X86VBroadcastld64 addr:$src)))), + def : Pat<(v8f32 (any_fpround (v8f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2PSZrmb addr:$src)>; def : Pat<(vselect VK8WM:$mask, - (fpround (v8f64 (X86VBroadcastld64 addr:$src))), + (any_fpround (v8f64 (X86VBroadcastld64 addr:$src))), (v8f32 VR256X:$src0)), (VCVTPD2PSZrmbk VR256X:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, - (fpround (v8f64 (X86VBroadcastld64 addr:$src))), + (any_fpround (v8f64 (X86VBroadcastld64 addr:$src))), v8f32x_info.ImmAllZerosV), (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>; } let Predicates = [HasVLX] in { - def : Pat<(v4f32 (fpround (v4f64 VR256X:$src))), + def : Pat<(v4f32 (any_fpround (v4f64 VR256X:$src))), (VCVTPD2PSZ256rr VR256X:$src)>; - def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))), + def : Pat<(vselect VK4WM:$mask, (v4f32 (any_fpround (v4f64 VR256X:$src))), VR128X:$src0), (VCVTPD2PSZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>; - def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (v4f64 VR256X:$src))), + def : Pat<(vselect VK4WM:$mask, (v4f32 (any_fpround (v4f64 VR256X:$src))), v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rrkz VK4WM:$mask, VR256X:$src)>; - def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))), + def : Pat<(v4f32 (any_fpround (loadv4f64 addr:$src))), (VCVTPD2PSZ256rm addr:$src)>; - def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))), + def : Pat<(vselect VK4WM:$mask, (v4f32 (any_fpround (loadv4f64 addr:$src))), VR128X:$src0), (VCVTPD2PSZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>; - def : Pat<(vselect VK4WM:$mask, (v4f32 (fpround (loadv4f64 addr:$src))), + def : Pat<(vselect VK4WM:$mask, (v4f32 (any_fpround (loadv4f64 addr:$src))), v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rmkz VK4WM:$mask, addr:$src)>; - def : Pat<(v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), + def : Pat<(v4f32 (any_fpround (v4f64 (X86VBroadcastld64 addr:$src)))), (VCVTPD2PSZ256rmb addr:$src)>; def : Pat<(vselect VK4WM:$mask, - (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), + (v4f32 (any_fpround (v4f64 (X86VBroadcastld64 addr:$src)))), VR128X:$src0), (VCVTPD2PSZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>; def : Pat<(vselect VK4WM:$mask, - (v4f32 (fpround (v4f64 (X86VBroadcastld64 addr:$src)))), + (v4f32 (any_fpround (v4f64 (X86VBroadcastld64 addr:$src)))), v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>; @@ -8963,17 +8963,17 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src), OpcodeStr, "$src", "$src", - (_.VT (fsqrt _.RC:$src))>, EVEX, + (_.VT (any_fsqrt _.RC:$src))>, EVEX, Sched<[sched]>; defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src), OpcodeStr, "$src", "$src", - (fsqrt (_.VT + (any_fsqrt (_.VT (bitconvert (_.LdFrag addr:$src))))>, EVEX, Sched<[sched.Folded, sched.ReadAfterFold]>; defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), OpcodeStr, "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, - (fsqrt (_.VT + (any_fsqrt (_.VT (_.BroadcastLdFrag addr:$src)))>, EVEX, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -9054,13 +9054,13 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri } let Predicates = [HasAVX512] in { - def : Pat<(_.EltVT (fsqrt _.FRC:$src)), + def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)), (!cast<Instruction>(Name#Zr) (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>; } let Predicates = [HasAVX512, OptForSize] in { - def : Pat<(_.EltVT (fsqrt (load addr:$src))), + def : Pat<(_.EltVT (any_fsqrt (load addr:$src))), (!cast<Instruction>(Name#Zm) (_.EltVT (IMPLICIT_DEF)), addr:$src)>; } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index adc616e86c2..f97d90076c1 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1219,18 +1219,18 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC; } -def : Pat<(f32 (fpround FR64:$src)), +def : Pat<(f32 (any_fpround FR64:$src)), (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>, Requires<[UseAVX]>; let isCodeGenOnly = 1 in { def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (fpround FR64:$src))]>, + [(set FR32:$dst, (any_fpround FR64:$src))]>, Sched<[WriteCvtSD2SS]>, SIMD_EXC; def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (fpround (loadf64 addr:$src)))]>, + [(set FR32:$dst, (any_fpround (loadf64 addr:$src)))]>, XD, Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtSD2SS.Folded]>, SIMD_EXC; } @@ -1284,19 +1284,19 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), Requires<[UseAVX, OptForSize]>, SIMD_EXC; } // isCodeGenOnly = 1, hasSideEffects = 0 -def : Pat<(f64 (fpextend FR32:$src)), +def : Pat<(f64 (any_fpextend FR32:$src)), (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>; -def : Pat<(fpextend (loadf32 addr:$src)), +def : Pat<(any_fpextend (loadf32 addr:$src)), (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX, OptForSize]>; let isCodeGenOnly = 1 in { def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (fpextend FR32:$src))]>, + [(set FR64:$dst, (any_fpextend FR32:$src))]>, XS, Requires<[UseSSE2]>, Sched<[WriteCvtSS2SD]>, SIMD_EXC; def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (fpextend (loadf32 addr:$src)))]>, + [(set FR64:$dst, (any_fpextend (loadf32 addr:$src)))]>, XS, Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtSS2SD.Folded]>, SIMD_EXC; } // isCodeGenOnly = 1 @@ -1335,13 +1335,13 @@ let Predicates = [UseAVX] in { def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector - (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), + (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), (VCVTSD2SSrr_Int VR128:$dst, VR128:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector - (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), + (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>; def : Pat<(v4f32 (X86Movss @@ -1389,13 +1389,13 @@ let Predicates = [UseSSE2] in { def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector - (f32 (fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), + (f32 (any_fpround (f64 (extractelt VR128:$src, (iPTR 0))))))))), (CVTSD2SSrr_Int VR128:$dst, VR128:$src)>; def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector - (f64 (fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), + (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>; def : Pat<(v2f64 (X86Movsd @@ -1625,7 +1625,7 @@ def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG; def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))]>, + [(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>, PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG; def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", @@ -1740,9 +1740,9 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC; let Predicates = [HasAVX, NoVLX] in { - def : Pat<(v4f32 (fpround (v4f64 VR256:$src))), + def : Pat<(v4f32 (any_fpround (v4f64 VR256:$src))), (VCVTPD2PSYrr VR256:$src)>; - def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))), + def : Pat<(v4f32 (any_fpround (loadv4f64 addr:$src))), (VCVTPD2PSYrm addr:$src)>; } @@ -3007,10 +3007,10 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, } // Square root. -defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt, UseAVX>, - sse1_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, - sse2_fp_unop_s<0x51, "sqrt", fsqrt, SchedWriteFSqrt64, UseAVX>, - sse2_fp_unop_p<0x51, "sqrt", fsqrt, SchedWriteFSqrt64>, SIMD_EXC; +defm SQRT : sse1_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, UseAVX>, + sse1_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, [HasAVX, NoVLX]>, + sse2_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64, UseAVX>, + sse2_fp_unop_p<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt64>, SIMD_EXC; // Reciprocal approximations. Note that these typically require refinement // in order to obtain suitable precision. @@ -3039,8 +3039,8 @@ multiclass scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, SDNode Mo } } -defm : scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>; -defm : scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>; +defm : scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>; +defm : scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>; multiclass scalar_unary_math_intr_patterns<Intrinsic Intr, string OpcPrefix, SDNode Move, ValueType VT, |