diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 135 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 30 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 12 |
6 files changed, 153 insertions, 49 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 4e907fd19e7..d9f95df57be 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -502,6 +502,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) { switch (Op.getOpcode()) { case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: // "Promote" the operation by extending the operand. return PromoteINT_TO_FP(Op); case ISD::FP_TO_UINT: @@ -550,7 +552,8 @@ SDValue VectorLegalizer::Promote(SDValue Op) { SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { // INT_TO_FP operations may require the input operand be promoted even // when the type is otherwise legal. - MVT VT = Op.getOperand(0).getSimpleValueType(); + bool IsStrict = Op->isStrictFPOpcode(); + MVT VT = Op.getOperand(IsStrict ? 1 : 0).getSimpleValueType(); MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && "Vectors have different number of elements!"); @@ -558,8 +561,10 @@ SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { SDLoc dl(Op); SmallVector<SDValue, 4> Operands(Op.getNumOperands()); - unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : - ISD::SIGN_EXTEND; + unsigned Opc = (Op.getOpcode() == ISD::UINT_TO_FP || + Op.getOpcode() == ISD::STRICT_UINT_TO_FP) + ? ISD::ZERO_EXTEND + : ISD::SIGN_EXTEND; for (unsigned j = 0; j != Op.getNumOperands(); ++j) { if (Op.getOperand(j).getValueType().isVector()) Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j)); @@ -567,6 +572,10 @@ SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { Operands[j] = Op.getOperand(j); } + if (IsStrict) + return DAG.getNode(Op.getOpcode(), dl, {Op.getValueType(), MVT::Other}, + Operands); + return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c383d9252b3..66a117452f1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -861,6 +861,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::SELECT, MVT::v4f32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom); setOperationAction(ISD::LOAD, MVT::v2f32, Custom); setOperationAction(ISD::STORE, MVT::v2f32, Custom); @@ -1170,6 +1171,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal); @@ -1296,6 +1298,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // The custom lowering for UINT_TO_FP for v8i32 becomes interesting // when we have a 256bit-wide blend with immediate. setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom); // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { @@ -1459,6 +1462,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal); @@ -1557,9 +1562,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (Subtarget.hasDQI()) { setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); - setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal); @@ -1632,6 +1638,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); for (auto VT : { MVT::v2i64, MVT::v4i64 }) { setOperationAction(ISD::SMAX, VT, Legal); @@ -1658,6 +1666,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, for (auto VT : { MVT::v2i64, MVT::v4i64 }) { setOperationAction(ISD::SINT_TO_FP, VT, Legal); setOperationAction(ISD::UINT_TO_FP, VT, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal); setOperationAction(ISD::FP_TO_SINT, VT, Legal); setOperationAction(ISD::FP_TO_UINT, VT, Legal); setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); @@ -1966,6 +1976,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); + setTargetDAGCombine(ISD::STRICT_SINT_TO_FP); + setTargetDAGCombine(ISD::STRICT_UINT_TO_FP); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::XOR); @@ -18560,6 +18572,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, bool IsStrict = Op->isStrictFPOpcode(); unsigned OpNo = IsStrict ? 1 : 0; SDValue Src = Op.getOperand(OpNo); + SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode(); MVT SrcVT = Src.getSimpleValueType(); MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); @@ -18568,8 +18581,14 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, return Extract; if (SrcVT.isVector()) { - if (SrcVT == MVT::v2i32 && VT == MVT::v2f64 && !IsStrict) { - // FIXME: A strict version of CVTSI2P is needed. + if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) { + // Note: Since v2f64 is a legal type. We don't need to zero extend the + // source for strict FP. + if (IsStrict) + return DAG.getNode( + X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other}, + {Chain, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, + DAG.getUNDEF(SrcVT))}); return DAG.getNode(X86ISD::CVTSI2P, dl, VT, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, DAG.getUNDEF(SrcVT))); @@ -18597,7 +18616,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Src); if (IsStrict) return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, - {Op.getOperand(0), Ext}); + {Chain, Ext}); return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext); } @@ -18617,7 +18636,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, auto PtrVT = getPointerTy(MF.getDataLayout()); int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); - SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode(); Chain = DAG.getStore( Chain, dl, ValueToStore, StackSlot, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI)); @@ -18844,19 +18862,21 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG, if (Op.getSimpleValueType() != MVT::v2f64) return SDValue(); - // FIXME: Need to fix the lack of StrictFP support here. - if (Op.getNode()->isStrictFPOpcode()) - return SDValue(); + bool IsStrict = Op->isStrictFPOpcode(); - SDValue N0 = Op.getOperand(0); + SDValue N0 = Op.getOperand(IsStrict ? 1 : 0); assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type"); // Legalize to v4i32 type. N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, DAG.getUNDEF(MVT::v2i32)); - if (Subtarget.hasAVX512()) + if (Subtarget.hasAVX512()) { + if (IsStrict) + return DAG.getNode(X86ISD::STRICT_CVTUI2P, DL, {MVT::v2f64, MVT::Other}, + {Op.getOperand(0), N0}); return DAG.getNode(X86ISD::CVTUI2P, DL, MVT::v2f64, N0); + } // Same implementation as VectorLegalizer::ExpandUINT_TO_FLOAT, // but using v2i32 to v2f64 with X86ISD::CVTSI2P. @@ -18870,6 +18890,21 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG, SDValue HI = DAG.getNode(ISD::SRL, DL, MVT::v4i32, N0, HalfWord); SDValue LO = DAG.getNode(ISD::AND, DL, MVT::v4i32, N0, HalfWordMask); + if (IsStrict) { + SDValue fHI = DAG.getNode(X86ISD::STRICT_CVTSI2P, DL, + {MVT::v2f64, MVT::Other}, {Op.getOperand(0), HI}); + fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {MVT::v2f64, MVT::Other}, + {fHI.getValue(1), fHI, TWOHW}); + SDValue fLO = DAG.getNode(X86ISD::STRICT_CVTSI2P, DL, + {MVT::v2f64, MVT::Other}, {Op.getOperand(0), LO}); + SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + fHI.getValue(1), fLO.getValue(1)); + + // Add the two halves + return DAG.getNode(ISD::STRICT_FADD, DL, {MVT::v2f64, MVT::Other}, + {Chain, fHI, fLO}); + } + SDValue fHI = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, HI); fHI = DAG.getNode(ISD::FMUL, DL, MVT::v2f64, fHI, TWOHW); SDValue fLO = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, LO); @@ -18902,7 +18937,8 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, return SDValue(); SDLoc DL(Op); - SDValue V = Op->getOperand(0); + bool IsStrict = Op->isStrictFPOpcode(); + SDValue V = Op->getOperand(IsStrict ? 1 : 0); MVT VecIntVT = V.getSimpleValueType(); bool Is128 = VecIntVT == MVT::v4i32; MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32; @@ -18965,10 +19001,18 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, // float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f); SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High); // TODO: Are there any fast-math-flags to propagate here? + // (float4) lo; + SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low); + // return (float4) lo + fhi; + if (IsStrict) { + SDValue FHigh = DAG.getNode(ISD::STRICT_FADD, DL, {VecFloatVT, MVT::Other}, + {Op.getOperand(0), HighBitcast, VecCstFAdd}); + return DAG.getNode(ISD::STRICT_FADD, DL, {VecFloatVT, MVT::Other}, + {FHigh.getValue(1), LowBitcast, FHigh}); + } + SDValue FHigh = DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd); - // return (float4) lo + fhi; - SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low); return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh); } @@ -19108,6 +19152,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, if (IsStrict) { SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other}, {Chain, Fild, Fudge}); + // STRICT_FP_ROUND can't handle equal types. + if (DstVT == MVT::f80) + return Add; return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other}, {Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)}); } @@ -29402,6 +29449,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CVTTS2UI_SAE: return "X86ISD::CVTTS2UI_SAE"; case X86ISD::CVTSI2P: return "X86ISD::CVTSI2P"; case X86ISD::CVTUI2P: return "X86ISD::CVTUI2P"; + case X86ISD::STRICT_CVTSI2P: return "X86ISD::STRICT_CVTSI2P"; + case X86ISD::STRICT_CVTUI2P: return "X86ISD::STRICT_CVTUI2P"; case X86ISD::MCVTSI2P: return "X86ISD::MCVTSI2P"; case X86ISD::MCVTUI2P: return "X86ISD::MCVTUI2P"; case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS"; @@ -34804,6 +34853,8 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, break; case X86ISD::STRICT_CVTTP2SI: case X86ISD::STRICT_CVTTP2UI: + case X86ISD::STRICT_CVTSI2P: + case X86ISD::STRICT_CVTUI2P: if (In.getOperand(1).getValueType() == MVT::v2f64 || In.getOperand(1).getValueType() == MVT::v2i64) return N->getOperand(0); @@ -43627,16 +43678,18 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N, // unary operation isn't a bitwise AND, or if the sizes of the operations // aren't the same. EVT VT = N->getValueType(0); - if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND || - N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC || - VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits()) + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); + if (!VT.isVector() || Op0->getOpcode() != ISD::AND || + Op0->getOperand(0)->getOpcode() != ISD::SETCC || + VT.getSizeInBits() != Op0.getValueSizeInBits()) return SDValue(); // Now check that the other operand of the AND is a constant. We could // make the transformation for non-constant splats as well, but it's unclear // that would be a benefit as it would not eliminate any operations, just // perform one more step in scalar code before moving to the vector unit. - if (auto *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(0).getOperand(1))) { + if (auto *BV = dyn_cast<BuildVectorSDNode>(Op0.getOperand(1))) { // Bail out if the vector isn't a constant. if (!BV->isConstant()) return SDValue(); @@ -43646,12 +43699,19 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N, EVT IntVT = BV->getValueType(0); // Create a new constant of the appropriate type for the transformed // DAG. - SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0)); + SDValue SourceConst; + if (IsStrict) + SourceConst = DAG.getNode(N->getOpcode(), DL, {VT, MVT::Other}, + {N->getOperand(0), SDValue(BV, 0)}); + else + SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0)); // The AND node needs bitcasts to/from an integer vector type around it. SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst); - SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, - N->getOperand(0)->getOperand(0), MaskConst); + SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, Op0->getOperand(0), + MaskConst); SDValue Res = DAG.getBitcast(VT, NewAnd); + if (IsStrict) + return DAG.getMergeValues({Res, SourceConst.getValue(1)}, DL); return Res; } @@ -43695,7 +43755,8 @@ static SDValue combineToFPTruncExtElt(SDNode *N, SelectionDAG &DAG) { static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - SDValue Op0 = N->getOperand(0); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); EVT VT = N->getValueType(0); EVT InVT = Op0.getValueType(); @@ -43709,14 +43770,21 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0); // UINT_TO_FP isn't legal without AVX512 so use SINT_TO_FP. + if (IsStrict) + return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, + {N->getOperand(0), P}); return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); } // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform // the optimization here. - if (DAG.SignBitIsZero(Op0)) + if (DAG.SignBitIsZero(Op0)) { + if (IsStrict) + return DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(N), {VT, MVT::Other}, + {N->getOperand(0), Op0}); return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0); + } return SDValue(); } @@ -43726,11 +43794,12 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { // First try to optimize away the conversion entirely when it's // conditionally from a constant. Vectors only. + bool IsStrict = N->isStrictFPOpcode(); if (SDValue Res = combineVectorCompareAndMaskUnaryOp(N, DAG)) return Res; // Now move on to more general possibilities. - SDValue Op0 = N->getOperand(0); + SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); EVT VT = N->getValueType(0); EVT InVT = Op0.getValueType(); @@ -43742,6 +43811,9 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, InVT.getVectorNumElements()); SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0); + if (IsStrict) + return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, + {N->getOperand(0), P}); return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); } @@ -43759,6 +43831,9 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, SDLoc dl(N); if (DCI.isBeforeLegalize() || TruncVT != MVT::v2i32) { SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0); + if (IsStrict) + return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, + {N->getOperand(0), Trunc}); return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc); } // If we're after legalize and the type is v2i32 we need to shuffle and @@ -43767,6 +43842,9 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0); SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast, { 0, 2, -1, -1 }); + if (IsStrict) + return DAG.getNode(X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other}, + {N->getOperand(0), Shuf}); return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf); } } @@ -43797,6 +43875,9 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, } } + if (IsStrict) + return SDValue(); + if (SDValue V = combineToFPTruncExtElt(N, DAG)) return V; @@ -45420,8 +45501,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget); case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget); case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget); - case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, DCI, Subtarget); - case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget); + case ISD::SINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: + return combineSIntToFP(N, DAG, DCI, Subtarget); + case ISD::UINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + return combineUIntToFP(N, DAG, Subtarget); case ISD::FADD: case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); case ISD::FNEG: return combineFneg(N, DAG, Subtarget); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 2b78dc0f5ac..18af57156a3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -512,6 +512,7 @@ namespace llvm { // Vector signed/unsigned integer to float/double. CVTSI2P, CVTUI2P, + STRICT_CVTSI2P, STRICT_CVTUI2P, // Masked versions of above. Used for v2f64->v4f32. // SRC, PASSTHRU, MASK diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index dfb5e34fa79..c3da6d2a7c5 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8108,7 +8108,7 @@ multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, VK4WM:$mask, i64mem:$src), 0, "att">; } -defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, X86VSintToFP, +defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", any_sint_to_fp, X86any_VSintToFP, SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", any_sint_to_fp, @@ -8132,7 +8132,7 @@ defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui, PS, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp, - X86VUintToFP, SchedWriteCvtDQ2PD>, XS, + X86any_VUintToFP, SchedWriteCvtDQ2PD>, XS, EVEX_CD8<32, CD8VH>; defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", any_uint_to_fp, @@ -8398,32 +8398,32 @@ def : Pat<(v4f64 (any_uint_to_fp (v4i32 VR128X:$src1))), (v8i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_ymm)>; -def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))), +def : Pat<(v2f64 (X86any_VUintToFP (v4i32 VR128X:$src1))), (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr (v8i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)))), sub_xmm)>; } let Predicates = [HasVLX] in { - def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), + def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), (VCVTDQ2PDZ128rm addr:$src)>; def : Pat<(v2f64 (vselect VK2WM:$mask, - (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), + (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), VR128X:$src0)), (VCVTDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(v2f64 (vselect VK2WM:$mask, - (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), + (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), v2f64x_info.ImmAllZerosV)), (VCVTDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v2f64 (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), + def : Pat<(v2f64 (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), (VCVTUDQ2PDZ128rm addr:$src)>; def : Pat<(v2f64 (vselect VK2WM:$mask, - (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), + (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), VR128X:$src0)), (VCVTUDQ2PDZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(v2f64 (vselect VK2WM:$mask, - (X86VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), + (X86any_VUintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src)))), v2f64x_info.ImmAllZerosV)), (VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>; } @@ -8431,7 +8431,7 @@ let Predicates = [HasVLX] in { let Predicates = [HasDQI, HasVLX] in { // Special patterns to allow use of X86VMSintToFP for masking. Instruction // patterns have been disabled with null_frag. - def : Pat<(v4f32 (X86VSintToFP (v2i64 VR128X:$src))), + def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))), (VCVTQQ2PSZ128rr VR128X:$src)>; def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), VK2WM:$mask), @@ -8440,7 +8440,7 @@ let Predicates = [HasDQI, HasVLX] in { VK2WM:$mask), (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; - def : Pat<(v4f32 (X86VSintToFP (loadv2i64 addr:$src))), + def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))), (VCVTQQ2PSZ128rm addr:$src)>; def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), VK2WM:$mask), @@ -8449,7 +8449,7 @@ let Predicates = [HasDQI, HasVLX] in { VK2WM:$mask), (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4f32 (X86VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), + def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), (VCVTQQ2PSZ128rmb addr:$src)>; def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)), (v4f32 VR128X:$src0), VK2WM:$mask), @@ -8460,7 +8460,7 @@ let Predicates = [HasDQI, HasVLX] in { // Special patterns to allow use of X86VMUintToFP for masking. Instruction // patterns have been disabled with null_frag. - def : Pat<(v4f32 (X86VUintToFP (v2i64 VR128X:$src))), + def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))), (VCVTUQQ2PSZ128rr VR128X:$src)>; def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0), VK2WM:$mask), @@ -8469,7 +8469,7 @@ let Predicates = [HasDQI, HasVLX] in { VK2WM:$mask), (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>; - def : Pat<(v4f32 (X86VUintToFP (loadv2i64 addr:$src))), + def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))), (VCVTUQQ2PSZ128rm addr:$src)>; def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0), VK2WM:$mask), @@ -8478,7 +8478,7 @@ let Predicates = [HasDQI, HasVLX] in { VK2WM:$mask), (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>; - def : Pat<(v4f32 (X86VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), + def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))), (VCVTUQQ2PSZ128rmb addr:$src)>; def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)), (v4f32 VR128X:$src0), VK2WM:$mask), diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 35fc080ed94..a04c493675a 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -645,6 +645,15 @@ def X86any_cvttp2ui : PatFrags<(ops node:$src), def X86VSintToFP : SDNode<"X86ISD::CVTSI2P", SDTVintToFP>; def X86VUintToFP : SDNode<"X86ISD::CVTUI2P", SDTVintToFP>; +def X86strict_VSintToFP : SDNode<"X86ISD::STRICT_CVTSI2P", SDTVintToFP, [SDNPHasChain]>; +def X86strict_VUintToFP : SDNode<"X86ISD::STRICT_CVTUI2P", SDTVintToFP, [SDNPHasChain]>; +def X86any_VSintToFP : PatFrags<(ops node:$src), + [(X86strict_VSintToFP node:$src), + (X86VSintToFP node:$src)]>; +def X86any_VUintToFP : PatFrags<(ops node:$src), + [(X86strict_VUintToFP node:$src), + (X86VUintToFP node:$src)]>; + // cvt int-to-fp staff def X86cvtp2Int : SDNode<"X86ISD::CVTP2SI", SDTFloatToInt>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 196cf47450d..c218acc6e36 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1650,7 +1650,7 @@ let hasSideEffects = 0, mayLoad = 1 in def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (X86VSintToFP + (v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))))]>, @@ -1658,7 +1658,7 @@ def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>, + (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>, VEX, Sched<[WriteCvtI2PD]>, VEX_WIG; def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", @@ -1677,7 +1677,7 @@ let hasSideEffects = 0, mayLoad = 1 in def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (X86VSintToFP + (v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))))]>, @@ -1685,18 +1685,18 @@ def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>, + (v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>, Sched<[WriteCvtI2PD]>; // AVX register conversion intrinsics let Predicates = [HasAVX, NoVLX] in { - def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), + def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), (VCVTDQ2PDrm addr:$src)>; } // Predicates = [HasAVX, NoVLX] // SSE2 register conversion intrinsics let Predicates = [UseSSE2] in { - def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), + def : Pat<(v2f64 (X86any_VSintToFP (bc_v4i32 (v2i64 (X86vzload64 addr:$src))))), (CVTDQ2PDrm addr:$src)>; } // Predicates = [UseSSE2] |