diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 13 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-cvt.ll | 273 | ||||
| -rwxr-xr-x | llvm/test/CodeGen/X86/avx512-schedule.ll | 42 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shift-ashr-512.ll | 3 |
8 files changed, 218 insertions, 146 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3f10c978047..7f1969d0c79 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -14294,7 +14294,8 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, int NumElems = VT.getVectorNumElements(); if ((Subtarget.hasBWI() && (NumElems >= 32)) || (Subtarget.hasDQI() && (NumElems < 32))) - return DAG.getNode(X86ISD::CVT2MASK, DL, VT, Shuffle); + return DAG.getNode(X86ISD::PCMPGTM, DL, VT, DAG.getConstant(0, DL, ExtVT), + Shuffle); return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle); } @@ -16494,7 +16495,8 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, DAG.getConstant(ShiftInx, DL, ExtVT)); In = DAG.getBitcast(InVT, In); } - return DAG.getNode(X86ISD::CVT2MASK, DL, VT, In); + return DAG.getNode(X86ISD::PCMPGTM, DL, VT, DAG.getConstant(0, DL, InVT), + In); } // Use TESTD/Q, extended vector to packed dword/qword. assert((InVT.is256BitVector() || InVT.is128BitVector()) && @@ -20521,7 +20523,8 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); MVT BitcastVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()); - SDValue CvtMask = DAG.getNode(IntrData->Opc0, dl, MaskVT, + SDValue CvtMask = DAG.getNode(X86ISD::PCMPGTM, dl, MaskVT, + DAG.getConstant(0, dl, SrcVT), Op.getOperand(1)); SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT, DAG.getUNDEF(BitcastVT), CvtMask, @@ -23148,7 +23151,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, V0 = DAG.getBitcast(VT, V0); V1 = DAG.getBitcast(VT, V1); Sel = DAG.getBitcast(VT, Sel); - Sel = DAG.getNode(X86ISD::CVT2MASK, dl, MaskVT, Sel); + Sel = DAG.getNode(X86ISD::PCMPGTM, dl, MaskVT, + DAG.getConstant(0, dl, VT), Sel); return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1)); } else if (Subtarget.hasSSE41()) { // On SSE41 targets we make use of the fact that VSELECT lowers @@ -25272,7 +25276,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VFPROUND: return "X86ISD::VFPROUND"; case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND"; case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND"; - case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK"; case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ"; case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ"; case X86ISD::VSHL: return "X86ISD::VSHL"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index c540f29f165..7a981a742f4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -304,9 +304,6 @@ namespace llvm { // Vector FP round. VFPROUND, VFPROUND_RND, VFPROUNDS_RND, - // Convert a vector to mask, set bits base on MSB. - CVT2MASK, - // 128-bit vector logical left / right shift VSHLDQ, VSRLDQ, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index b7061bedcf1..e617a5d09de 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8751,7 +8751,7 @@ defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > { def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set _.KRC:$dst, (X86cvt2mask (_.VT _.RC:$src)))], + [(set _.KRC:$dst, (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src)))], IIC_SSE_MOV_S_RR>, EVEX, Sched<[WriteMove]>; } @@ -8759,7 +8759,7 @@ multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string multiclass convert_vector_to_mask_lowering<X86VectorVTInfo ExtendInfo, X86VectorVTInfo _> { - def : Pat<(_.KVT (X86cvt2mask (_.VT _.RC:$src))), + def : Pat<(_.KVT (X86pcmpgtm _.ImmAllZerosV, (_.VT _.RC:$src))), (_.KVT (COPY_TO_REGCLASS (!cast<Instruction>(NAME#"Zrr") (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index ebbef00c01d..63a62ed636a 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -671,8 +671,6 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND", SDTCisOpSmallerThanOp<0, 1>, SDTCisVT<2, i32>]>>; -def X86cvt2mask : SDNode<"X86ISD::CVT2MASK", SDTIntTruncOp>; - // galois field arithmetic def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>; def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 0782d559874..d14369b7776 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -449,15 +449,15 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0), X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0), - X86_INTRINSIC_DATA(avx512_cvtb2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), - X86_INTRINSIC_DATA(avx512_cvtb2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), - X86_INTRINSIC_DATA(avx512_cvtb2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), - X86_INTRINSIC_DATA(avx512_cvtd2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), - X86_INTRINSIC_DATA(avx512_cvtd2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), - X86_INTRINSIC_DATA(avx512_cvtd2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), - X86_INTRINSIC_DATA(avx512_cvtq2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), - X86_INTRINSIC_DATA(avx512_cvtq2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), - X86_INTRINSIC_DATA(avx512_cvtq2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), + X86_INTRINSIC_DATA(avx512_cvtb2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), + X86_INTRINSIC_DATA(avx512_cvtb2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), + X86_INTRINSIC_DATA(avx512_cvtb2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), + X86_INTRINSIC_DATA(avx512_cvtd2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), + X86_INTRINSIC_DATA(avx512_cvtd2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), + X86_INTRINSIC_DATA(avx512_cvtd2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), + X86_INTRINSIC_DATA(avx512_cvtq2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), + X86_INTRINSIC_DATA(avx512_cvtq2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), + X86_INTRINSIC_DATA(avx512_cvtq2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_cvtsi2sd64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2ss32, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtsi2ss64, INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0), @@ -472,9 +472,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_cvtusi2ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0), - X86_INTRINSIC_DATA(avx512_cvtw2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), - X86_INTRINSIC_DATA(avx512_cvtw2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), - X86_INTRINSIC_DATA(avx512_cvtw2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0), + X86_INTRINSIC_DATA(avx512_cvtw2mask_128, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), + X86_INTRINSIC_DATA(avx512_cvtw2mask_256, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), + X86_INTRINSIC_DATA(avx512_cvtw2mask_512, CONVERT_TO_MASK, X86ISD::PCMPGTM, 0), X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_kand_w, MASK_BINOP, ISD::AND, 0), diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index 1cedcdb1232..3dede2a8267 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -1402,8 +1402,7 @@ define <16 x float> @sbto16f32(<16 x i32> %a) { ; ; DQ-LABEL: sbto16f32: ; DQ: # %bb.0: -; DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; DQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 +; DQ-NEXT: vpmovd2m %zmm0, %k0 ; DQ-NEXT: vpmovm2d %k0, %zmm0 ; DQ-NEXT: vcvtdq2ps %zmm0, %zmm0 ; DQ-NEXT: retq @@ -1889,131 +1888,221 @@ define <16 x float> @usto16f32(<16 x i16> %a) { } define <16 x float> @ubto16f32(<16 x i32> %a) { -; ALL-LABEL: ubto16f32: -; ALL: # %bb.0: -; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 -; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} -; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0 -; ALL-NEXT: retq +; NODQ-LABEL: ubto16f32: +; NODQ: # %bb.0: +; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; NODQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0 +; NODQ-NEXT: retq +; +; DQ-LABEL: ubto16f32: +; DQ: # %bb.0: +; DQ-NEXT: vpmovd2m %zmm0, %k1 +; DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; DQ-NEXT: vcvtdq2ps %zmm0, %zmm0 +; DQ-NEXT: retq %mask = icmp slt <16 x i32> %a, zeroinitializer %1 = uitofp <16 x i1> %mask to <16 x float> ret <16 x float> %1 } define <16 x double> @ubto16f64(<16 x i32> %a) { -; NOVL-LABEL: ubto16f64: -; NOVL: # %bb.0: -; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 -; NOVL-NEXT: movl {{.*}}(%rip), %eax -; NOVL-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z} -; NOVL-NEXT: vcvtdq2pd %ymm0, %zmm0 -; NOVL-NEXT: kshiftrw $8, %k1, %k1 -; NOVL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z} -; NOVL-NEXT: vcvtdq2pd %ymm1, %zmm1 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: ubto16f64: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: movl {{.*}}(%rip), %eax +; NOVLDQ-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0 +; NOVLDQ-NEXT: kshiftrw $8, %k1, %k1 +; NOVLDQ-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1 +; NOVLDQ-NEXT: retq ; -; VL-LABEL: ubto16f64: -; VL: # %bb.0: -; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; VL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 -; VL-NEXT: movl {{.*}}(%rip), %eax -; VL-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} -; VL-NEXT: vcvtdq2pd %ymm0, %zmm0 -; VL-NEXT: kshiftrw $8, %k1, %k1 -; VL-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} -; VL-NEXT: vcvtdq2pd %ymm1, %zmm1 -; VL-NEXT: retq +; VLDQ-LABEL: ubto16f64: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vpmovd2m %zmm0, %k1 +; VLDQ-NEXT: movl {{.*}}(%rip), %eax +; VLDQ-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} +; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0 +; VLDQ-NEXT: kshiftrw $8, %k1, %k1 +; VLDQ-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} +; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1 +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: ubto16f64: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; VLNODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; VLNODQ-NEXT: movl {{.*}}(%rip), %eax +; VLNODQ-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} +; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0 +; VLNODQ-NEXT: kshiftrw $8, %k1, %k1 +; VLNODQ-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} +; VLNODQ-NEXT: vcvtdq2pd %ymm1, %zmm1 +; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: ubto16f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 +; AVX512DQ-NEXT: movl {{.*}}(%rip), %eax +; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z} +; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0 +; AVX512DQ-NEXT: kshiftrw $8, %k1, %k1 +; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z} +; AVX512DQ-NEXT: vcvtdq2pd %ymm1, %zmm1 +; AVX512DQ-NEXT: retq %mask = icmp slt <16 x i32> %a, zeroinitializer %1 = uitofp <16 x i1> %mask to <16 x double> ret <16 x double> %1 } define <8 x float> @ubto8f32(<8 x i32> %a) { -; NOVL-LABEL: ubto8f32: -; NOVL: # %bb.0: -; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 -; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} -; NOVL-NEXT: vcvtdq2ps %ymm0, %ymm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: ubto8f32: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0 +; NOVLDQ-NEXT: retq ; -; VL-LABEL: ubto8f32: -; VL: # %bb.0: -; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; VL-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 -; VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} -; VL-NEXT: vcvtdq2ps %ymm0, %ymm0 -; VL-NEXT: retq +; VLDQ-LABEL: ubto8f32: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vpmovd2m %ymm0, %k1 +; VLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} +; VLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0 +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: ubto8f32: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; VLNODQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 +; VLNODQ-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} +; VLNODQ-NEXT: vcvtdq2ps %ymm0, %ymm0 +; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: ubto8f32: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 +; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX512DQ-NEXT: retq %mask = icmp slt <8 x i32> %a, zeroinitializer %1 = uitofp <8 x i1> %mask to <8 x float> ret <8 x float> %1 } define <8 x double> @ubto8f64(<8 x i32> %a) { -; NOVL-LABEL: ubto8f64: -; NOVL: # %bb.0: -; NOVL-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 -; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} -; NOVL-NEXT: vcvtdq2pd %ymm0, %zmm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: ubto8f64: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0 +; NOVLDQ-NEXT: retq ; -; VL-LABEL: ubto8f64: -; VL: # %bb.0: -; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; VL-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 -; VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} -; VL-NEXT: vcvtdq2pd %ymm0, %zmm0 -; VL-NEXT: retq +; VLDQ-LABEL: ubto8f64: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vpmovd2m %ymm0, %k1 +; VLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} +; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0 +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: ubto8f64: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; VLNODQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 +; VLNODQ-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} +; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0 +; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: ubto8f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 +; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0 +; AVX512DQ-NEXT: retq %mask = icmp slt <8 x i32> %a, zeroinitializer %1 = uitofp <8 x i1> %mask to <8 x double> ret <8 x double> %1 } define <4 x float> @ubto4f32(<4 x i32> %a) { -; NOVL-LABEL: ubto4f32: -; NOVL: # %bb.0: -; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 -; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 -; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} -; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0 -; NOVL-NEXT: vzeroupper -; NOVL-NEXT: retq +; NOVLDQ-LABEL: ubto4f32: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; -; VL-LABEL: ubto4f32: -; VL: # %bb.0: -; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; VL-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 -; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} -; VL-NEXT: vcvtdq2ps %xmm0, %xmm0 -; VL-NEXT: retq +; VLDQ-LABEL: ubto4f32: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vpmovd2m %xmm0, %k1 +; VLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} +; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: ubto4f32: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; VLNODQ-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 +; VLNODQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} +; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: ubto4f32: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 +; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq %mask = icmp slt <4 x i32> %a, zeroinitializer %1 = uitofp <4 x i1> %mask to <4 x float> ret <4 x float> %1 } define <4 x double> @ubto4f64(<4 x i32> %a) { -; NOVL-LABEL: ubto4f64: -; NOVL: # %bb.0: -; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 -; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 -; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} -; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: ubto4f64: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 +; NOVLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; NOVLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0 +; NOVLDQ-NEXT: retq ; -; VL-LABEL: ubto4f64: -; VL: # %bb.0: -; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; VL-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 -; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} -; VL-NEXT: vcvtdq2pd %xmm0, %ymm0 -; VL-NEXT: retq +; VLDQ-LABEL: ubto4f64: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vpmovd2m %xmm0, %k1 +; VLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} +; VLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0 +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: ubto4f64: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; VLNODQ-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 +; VLNODQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} +; VLNODQ-NEXT: vcvtdq2pd %xmm0, %ymm0 +; VLNODQ-NEXT: retq +; +; AVX512DQ-LABEL: ubto4f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 +; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX512DQ-NEXT: retq %mask = icmp slt <4 x i32> %a, zeroinitializer %1 = uitofp <4 x i1> %mask to <4 x double> ret <4 x double> %1 diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index af99b86ca5d..48e049fcc5a 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -2359,16 +2359,14 @@ define double @uitof64(i32 %a) nounwind { define <16 x float> @sbto16f32(<16 x i32> %a) { ; GENERIC-LABEL: sbto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00] +; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33] ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sbto16f32: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -2829,16 +2827,14 @@ define <16 x float> @usto16f32(<16 x i16> %a) { define <16 x float> @ubto16f32(<16 x i32> %a) { ; GENERIC-LABEL: ubto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ubto16f32: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50] ; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -2850,8 +2846,7 @@ define <16 x float> @ubto16f32(<16 x i32> %a) { define <16 x double> @ubto16f64(<16 x i32> %a) { ; GENERIC-LABEL: ubto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: movl {{.*}}(%rip), %eax # sched: [5:0.50] ; GENERIC-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] @@ -2862,8 +2857,7 @@ define <16 x double> @ubto16f64(<16 x i32> %a) { ; ; SKX-LABEL: ubto16f64: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: movl {{.*}}(%rip), %eax # sched: [5:0.50] ; SKX-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [3:1.00] ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] @@ -2879,16 +2873,14 @@ define <16 x double> @ubto16f64(<16 x i32> %a) { define <8 x float> @ubto8f32(<8 x i32> %a) { ; GENERIC-LABEL: ubto8f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ubto8f32: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50] ; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -2900,16 +2892,14 @@ define <8 x float> @ubto8f32(<8 x i32> %a) { define <8 x double> @ubto8f64(<8 x i32> %a) { ; GENERIC-LABEL: ubto8f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ubto8f64: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50] ; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] @@ -2921,16 +2911,14 @@ define <8 x double> @ubto8f64(<8 x i32> %a) { define <4 x float> @ubto4f32(<4 x i32> %a) { ; GENERIC-LABEL: ubto4f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ubto4f32: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50] ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -2942,16 +2930,14 @@ define <4 x float> @ubto4f32(<4 x i32> %a) { define <4 x double> @ubto4f64(<4 x i32> %a) { ; GENERIC-LABEL: ubto4f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ubto4f64: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50] ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll index 77fb34a95a3..1d5a47b6df9 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -463,8 +463,7 @@ define <64 x i8> @ashr_const7_v64i8(<64 x i8> %a) { ; ; AVX512BW-LABEL: ashr_const7_v64i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 +; AVX512BW-NEXT: vpmovb2m %zmm0, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: retq %res = ashr <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> |

