diff options
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 114 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 7 |
2 files changed, 101 insertions, 20 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fb1d30b28e1..b50376bea53 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -20502,6 +20502,25 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Src1, Src2, Src3), Mask, PassThru, Subtarget, DAG); } + case INTR_TYPE_3OP_RM: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Src3 = Op.getOperand(3); + + // We specify 2 possible opcodes for intrinsics with rounding modes. + // First, we check if the intrinsic may have non-default rounding mode, + // (IntrData->Opc1 != 0), then we check the rounding mode operand. + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; + if (IntrWithRoundingModeOpcode != 0) { + SDValue Rnd = Op.getOperand(4); + if (!isRoundModeCurDirection(Rnd)) { + return DAG.getNode(IntrWithRoundingModeOpcode, + dl, Op.getValueType(), + Src1, Src2, Src3, Rnd); + } + } + return DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3); + } case VPERM_2OP : { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); @@ -30389,6 +30408,35 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, return SDValue(); } +/// Checks if the shuffle mask takes subsequent elements +/// alternately from two vectors. +/// For example <0, 5, 2, 7> or <8, 1, 10, 3, 12, 5, 14, 7> are both correct. +static bool isAddSubOrSubAddMask(ArrayRef<int> Mask, int ParitySrc[2]) { + + unsigned Size = Mask.size(); + for (unsigned i = 0; i != Size; ++i) { + int M = Mask[i]; + if (M < 0) + continue; + + // Make sure we are using the matching element from the input. + if ((M % Size) != i) + return false; + + // Make sure we use the same input for all elements of the same parity. + int Src = M / Size; + if (ParitySrc[i % 2] >= 0 && ParitySrc[i % 2] != Src) + return false; + ParitySrc[i % 2] = Src; + } + + // Make sure each input is used. + if (ParitySrc[0] < 0 || ParitySrc[1] < 0 || ParitySrc[0] == ParitySrc[1]) + return false; + + return true; +} + /// Returns true iff the shuffle node \p N can be replaced with ADDSUB(SUBADD) /// operation. If true is returned then the operands of ADDSUB(SUBADD) operation /// are written to the parameters \p Opnd0 and \p Opnd1. @@ -30444,27 +30492,8 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget, } ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask(); - int ParitySrc[2] = {-1, -1}; - unsigned Size = Mask.size(); - for (unsigned i = 0; i != Size; ++i) { - int M = Mask[i]; - if (M < 0) - continue; - - // Make sure we are using the matching element from the input. - if ((M % Size) != i) - return false; - - // Make sure we use the same input for all elements of the same parity. - int Src = M / Size; - if (ParitySrc[i % 2] >= 0 && ParitySrc[i % 2] != Src) - return false; - ParitySrc[i % 2] = Src; - } - - // Make sure each input is used. - if (ParitySrc[0] < 0 || ParitySrc[1] < 0 || ParitySrc[0] == ParitySrc[1]) + if (!isAddSubOrSubAddMask(Mask, ParitySrc)) return false; // It's a subadd if the vector in the even parity is an FADD. @@ -30476,11 +30505,56 @@ static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget, return true; } +/// Combine shuffle of two fma nodes into FMAddSub or FMSubAdd. +static SDValue combineShuffleToFMAddSub(SDNode *N, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { + // We only handle target-independent shuffles. + // FIXME: It would be easy and harmless to use the target shuffle mask + // extraction tool to support more. + if (N->getOpcode() != ISD::VECTOR_SHUFFLE) + return SDValue(); + + MVT VT = N->getSimpleValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!Subtarget.hasAnyFMA() || !TLI.isTypeLegal(VT)) + return SDValue(); + + // We're trying to match (shuffle fma(a, b, c), X86Fmsub(a, b, c). + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue FMAdd = Op0, FMSub = Op1; + if (FMSub.getOpcode() != X86ISD::FMSUB) + std::swap(FMAdd, FMSub); + + if (FMAdd.getOpcode() != ISD::FMA || FMSub.getOpcode() != X86ISD::FMSUB || + FMAdd.getOperand(0) != FMSub.getOperand(0) || !FMAdd.hasOneUse() || + FMAdd.getOperand(1) != FMSub.getOperand(1) || !FMSub.hasOneUse() || + FMAdd.getOperand(2) != FMSub.getOperand(2)) + return SDValue(); + + // Check for correct shuffle mask. + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask(); + int ParitySrc[2] = {-1, -1}; + if (!isAddSubOrSubAddMask(Mask, ParitySrc)) + return SDValue(); + + // FMAddSub takes zeroth operand from FMSub node. + SDLoc DL(N); + bool IsSubAdd = ParitySrc[0] == 0 ? Op0 == FMAdd : Op1 == FMAdd; + unsigned Opcode = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; + return DAG.getNode(Opcode, DL, VT, FMAdd.getOperand(0), FMAdd.getOperand(1), + FMAdd.getOperand(2)); +} + /// Try to combine a shuffle into a target-specific add-sub or /// mul-add-sub node. static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + if (SDValue V = combineShuffleToFMAddSub(N, Subtarget, DAG)) + return V; + SDValue Opnd0, Opnd1; bool IsSubAdd; if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd)) diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 7a9e9c28329..43875403489 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -23,6 +23,7 @@ enum IntrinsicType : uint16_t { INTR_NO_TYPE, GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASS, FPCLASSS, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP, + INTR_TYPE_3OP_RM, CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, CVTPD2PS, CVTPD2PS_MASK, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, @@ -1324,6 +1325,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0), X86_INTRINSIC_DATA(avx512_vcvtss2usi32, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0), X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0), + X86_INTRINSIC_DATA(avx512_vfmadd_pd_512, INTR_TYPE_3OP_RM, ISD::FMA, X86ISD::FMADD_RND), + X86_INTRINSIC_DATA(avx512_vfmadd_ps_512, INTR_TYPE_3OP_RM, ISD::FMA, X86ISD::FMADD_RND), + X86_INTRINSIC_DATA(avx512_vfmaddsub_pd_512, INTR_TYPE_3OP_RM, X86ISD::FMADDSUB, + X86ISD::FMADDSUB_RND), + X86_INTRINSIC_DATA(avx512_vfmaddsub_ps_512, INTR_TYPE_3OP_RM, X86ISD::FMADDSUB, + X86ISD::FMADDSUB_RND), X86_INTRINSIC_DATA(avx512_vpdpbusd_128, INTR_TYPE_3OP, X86ISD::VPDPBUSD, 0), X86_INTRINSIC_DATA(avx512_vpdpbusd_256, INTR_TYPE_3OP, X86ISD::VPDPBUSD, 0), |