diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 25 |
2 files changed, 32 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index de11f090b31..12a7998adcf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -22326,16 +22326,20 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, VT.getVectorNumElements()); MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts); - SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + // We support two versions of the gather intrinsics. One with scalar mask and + // one with vXi1 mask. Convert scalar to vXi1 if necessary. + if (Mask.getValueType() != MaskVT) + Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); // If source is undef or we know it won't be used, use a zero vector // to break register dependency. // TODO: use undef instead and let BreakFalseDeps deal with it? - if (Src.isUndef() || ISD::isBuildVectorAllOnes(VMask.getNode())) + if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode())) Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); - SDValue Ops[] = {Src, VMask, Base, Scale, Index, Disp, Segment, Chain}; + SDValue Ops[] = {Src, Mask, Base, Scale, Index, Disp, Segment, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; return DAG.getMergeValues(RetOps, dl); diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 890aa76858d..73503a86347 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -115,6 +115,31 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH, X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm), + X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0), + X86_INTRINSIC_DATA(avx512_mask_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0), + X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8, |

