diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 23 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 93 |
4 files changed, 137 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5782284d46f..fb990e7499e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16528,6 +16528,24 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget DAG.getConstant(X86CC, dl, MVT::i8), Cond); return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); } + case COMI_RM: { // Comparison intrinsics with Sae + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + SDValue CC = Op.getOperand(3); + SDValue Sae = Op.getOperand(4); + auto ComiType = TranslateX86ConstCondToX86CC(CC); + // choose between ordered and unordered (comi/ucomi) + unsigned comiOp = std::get<0>(ComiType) ? IntrData->Opc0 : IntrData->Opc1; + SDValue Cond; + if (cast<ConstantSDNode>(Sae)->getZExtValue() != + X86::STATIC_ROUNDING::CUR_DIRECTION) + Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae); + else + Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS); + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(std::get<1>(ComiType), dl, MVT::i8), Cond); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); + } case VSHIFT: return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), Op.getOperand(1), Op.getOperand(2), DAG); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 1dfd8d4510f..d15d0dc96e6 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -5605,6 +5605,29 @@ let Predicates = [HasAVX512] in { EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; } } + +// Unordered/Ordered scalar fp compare with Sea and set EFLAGS +multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, SDNode OpNode, + string OpcodeStr> { + def rb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), + !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), + [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2, + (i32 FROUND_NO_EXC)))], + IIC_SSE_COMIS_RR>, EVEX, EVEX_B, VEX_LIG, EVEX_V128, + Sched<[WriteFAdd]>; +} + +let Defs = [EFLAGS], Predicates = [HasAVX512] in { + defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, X86ucomiSae, "vucomiss">, + AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; + defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, X86ucomiSae, "vucomisd">, + AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; + defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, X86comiSae, "vcomiss">, + AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; + defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, X86comiSae, "vcomisd">, + AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; +} + let Defs = [EFLAGS], Predicates = [HasAVX512] in { defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32, "ucomiss">, PS, EVEX, VEX_LIG, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 25f247e9d62..b456460a5bb 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -38,6 +38,8 @@ def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>; def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisFP<1>, SDTCisVT<3, i8>, SDTCisVec<1>]>; +def SDTX86CmpTestSae : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, + SDTCisSameAs<1, 2>, SDTCisInt<3>]>; def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; @@ -66,7 +68,9 @@ def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>; def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>; def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; +def X86comiSae : SDNode<"X86ISD::COMI", SDTX86CmpTestSae>; def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; +def X86ucomiSae: SDNode<"X86ISD::UCOMI", SDTX86CmpTestSae>; def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>; //def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>; def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD", diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 80e55d04c1f..bb2f7248b0e 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -20,7 +20,7 @@ enum IntrinsicType { INTR_NO_TYPE, GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS, FPCLASSS, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP, - CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI, + CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI, COMI_RM, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK, INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK, @@ -1630,6 +1630,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0), + X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI), + X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI), X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0), @@ -1821,6 +1823,95 @@ static void verifyIntrinsicTables() { "Intrinsic data tables should have unique entries"); } +// X86 specific compare constants.
+// They must be kept in synch with avxintrin.h +#define _X86_CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ +#define _X86_CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ +#define _X86_CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */ +#define _X86_CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */ +#define _X86_CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */ +#define _X86_CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */ +#define _X86_CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */ +#define _X86_CMP_ORD_Q 0x07 /* Ordered (nonsignaling) */ +#define _X86_CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */ +#define _X86_CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unord, signaling) */ +#define _X86_CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */ +#define _X86_CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling) */ +#define _X86_CMP_NEQ_OQ 0x0c /* Not-equal (ordered, non-signaling) */ +#define _X86_CMP_GE_OS 0x0d /* Greater-than-or-equal (ordered, signaling) */ +#define _X86_CMP_GT_OS 0x0e /* Greater-than (ordered, signaling) */ +#define _X86_CMP_TRUE_UQ 0x0f /* True (unordered, non-signaling) */ +#define _X86_CMP_EQ_OS 0x10 /* Equal (ordered, signaling) */ +#define _X86_CMP_LT_OQ 0x11 /* Less-than (ordered, non-signaling) */ +#define _X86_CMP_LE_OQ 0x12 /* Less-than-or-equal (ordered, non-signaling) */ +#define _X86_CMP_UNORD_S 0x13 /* Unordered (signaling) */ +#define _X86_CMP_NEQ_US 0x14 /* Not-equal (unordered, signaling) */ +#define _X86_CMP_NLT_UQ 0x15 /* Not-less-than (unordered, non-signaling) */ +#define _X86_CMP_NLE_UQ 0x16 /* Not-less-than-or-equal (unord, non-signaling) */ +#define _X86_CMP_ORD_S 0x17 /* Ordered (signaling) */ +#define _X86_CMP_EQ_US 0x18 /* Equal (unordered, signaling) */ +#define _X86_CMP_NGE_UQ 0x19 /* Not-greater-than-or-equal (unord, non-sign) */ +#define _X86_CMP_NGT_UQ 0x1a /* Not-greater-than (unordered, non-signaling) */ +#define _X86_CMP_FALSE_OS 0x1b /* False (ordered, signaling) */ +#define _X86_CMP_NEQ_OS 0x1c /* Not-equal (ordered, signaling) */ +#define _X86_CMP_GE_OQ 0x1d /* Greater-than-or-equal (ordered, non-signaling) */ +#define _X86_CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */ +#define _X86_CMP_TRUE_US 0x1f /* True (unordered, signaling) */ + +/* +* Get comparison modifier from _mm_comi_round_sd/ss intrinsic +* Return tuple <isOrdered, X86 condcode> +*/ +static std::tuple<bool,unsigned> TranslateX86ConstCondToX86CC(SDValue &imm) { + ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(imm); + unsigned IntImm = CImm->getZExtValue(); + // On a floating point condition, the flags are set as follows: + // ZF PF CF op + // 0 | 0 | 0 | X > Y + // 0 | 0 | 1 | X < Y + // 1 | 0 | 0 | X == Y + // 1 | 1 | 1 | unordered + switch (IntImm) { + default: llvm_unreachable("Invalid floating point compare value for Comi!"); + case _X86_CMP_EQ_OQ: // 0x00 - Equal (ordered, nonsignaling) + case _X86_CMP_EQ_OS: // 0x10 - Equal (ordered, signaling) + return std::make_tuple(true, X86::COND_E); + case _X86_CMP_EQ_UQ: // 0x08 - Equal (unordered, non-signaling) + case _X86_CMP_EQ_US: // 0x18 - Equal (unordered, signaling) + return std::make_tuple(false , X86::COND_E); + case _X86_CMP_LT_OS: // 0x01 - Less-than (ordered, signaling) + case _X86_CMP_LT_OQ: // 0x11 - Less-than (ordered, nonsignaling) + return std::make_tuple(true, X86::COND_B); + case _X86_CMP_NGE_US: // 0x09 - Not-greater-than-or-equal (unordered, signaling) + case _X86_CMP_NGE_UQ: // 0x19 - Not-greater-than-or-equal (unordered, nonsignaling) + return std::make_tuple(false , X86::COND_B); + case _X86_CMP_LE_OS: // 0x02 - Less-than-or-equal (ordered, signaling) + case _X86_CMP_LE_OQ: // 0x12 - Less-than-or-equal (ordered, nonsignaling) + return std::make_tuple(true, X86::COND_BE); + case _X86_CMP_NGT_US: // 0x0A - Not-greater-than (unordered, signaling) + case _X86_CMP_NGT_UQ: // 0x1A - Not-greater-than (unordered, nonsignaling) + return std::make_tuple(false, X86::COND_BE); + case _X86_CMP_GT_OS: // 0x0E - Greater-than (ordered, signaling) + case _X86_CMP_GT_OQ: // 0x1E - Greater-than (ordered, nonsignaling) + return std::make_tuple(true, X86::COND_A); + case _X86_CMP_NLE_US: // 0x06 - Not-less-than-or-equal (unordered,signaling) + case _X86_CMP_NLE_UQ: // 0x16 - Not-less-than-or-equal (unordered, nonsignaling) + return std::make_tuple(false, X86::COND_A); + case _X86_CMP_GE_OS: // 0x0D - Greater-than-or-equal (ordered, signaling) + case _X86_CMP_GE_OQ: // 0x1D - Greater-than-or-equal (ordered, nonsignaling) + return std::make_tuple(true, X86::COND_AE); + case _X86_CMP_NLT_US: // 0x05 - Not-less-than (unordered, signaling) + case _X86_CMP_NLT_UQ: // 0x15 - Not-less-than (unordered, nonsignaling) + return std::make_tuple(false, X86::COND_AE); + case _X86_CMP_NEQ_OQ: // 0x0C - Not-equal (ordered, non-signaling) + case _X86_CMP_NEQ_OS: // 0x1C - Not-equal (ordered, signaling) + return std::make_tuple(true, X86::COND_NE); + case _X86_CMP_NEQ_UQ: // 0x04 - Not-equal (unordered, nonsignaling) + case _X86_CMP_NEQ_US: // 0x14 - Not-equal (unordered, signaling) + return std::make_tuple(false, X86::COND_NE); + } +} + } // End llvm namespace #endif |