summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-01-15 20:12:33 +0000
committerCraig Topper <craig.topper@intel.com>2019-01-15 20:12:33 +0000
commit82015b633b5f2cfb31dfa0315c53f34bd0dad6ec (patch)
treebab087674135a74c74edd44941a404efac181cb0 /llvm/lib
parentc9e9e28487b217b4807929f2fcfd8aaa9feddb7b (diff)
downloadbcm5719-llvm-82015b633b5f2cfb31dfa0315c53f34bd0dad6ec.tar.gz
bcm5719-llvm-82015b633b5f2cfb31dfa0315c53f34bd0dad6ec.zip
[X86] Add versions of the avx512 gather intrinsics that take the mask as a vXi1 vector instead of a scalar
In keeping with our general direction of having the vXi1 type present in IR, this patch converts the mask argument for avx512 gather to vXi1. This can avoid k-register to GPR to k-register transitions late in codegen. I left the existing intrinsics behind because they have many out of tree users such as ISPC. They generate their own code and don't go through the autoupgrade path which only works for bitcode and ll parsing. Ideally we will get them to migrate to target independent intrinsics, but it might be easier for them to migrate to these new intrinsics. I'll work on scatter and gatherpf/scatterpf next. Differential Revision: https://reviews.llvm.org/D56527 llvm-svn: 351234
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp10
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h25
2 files changed, 32 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index de11f090b31..12a7998adcf 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22326,16 +22326,20 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
VT.getVectorNumElements());
MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts);
- SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ // We support two versions of the gather intrinsics. One with scalar mask and
+ // one with vXi1 mask. Convert scalar to vXi1 if necessary.
+ if (Mask.getValueType() != MaskVT)
+ Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
// If source is undef or we know it won't be used, use a zero vector
// to break register dependency.
// TODO: use undef instead and let BreakFalseDeps deal with it?
- if (Src.isUndef() || ISD::isBuildVectorAllOnes(VMask.getNode()))
+ if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode()))
Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
- SDValue Ops[] = {Src, VMask, Base, Scale, Index, Disp, Segment, Chain};
+ SDValue Ops[] = {Src, Mask, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
return DAG.getMergeValues(RetOps, dl);
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 890aa76858d..73503a86347 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -115,6 +115,31 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_dps_512, GATHER, X86::VGATHERDPSZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpd_512, GATHER, X86::VGATHERQPDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div2_df, GATHER, X86::VGATHERQPDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div2_di, GATHER, X86::VPGATHERQQZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_df, GATHER, X86::VGATHERQPDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_di, GATHER, X86::VPGATHERQQZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_sf, GATHER, X86::VGATHERQPSZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div4_si, GATHER, X86::VPGATHERQDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div8_sf, GATHER, X86::VGATHERQPSZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3div8_si, GATHER, X86::VPGATHERQDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv2_df, GATHER, X86::VGATHERDPDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv2_di, GATHER, X86::VPGATHERDQZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_df, GATHER, X86::VGATHERDPDZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_di, GATHER, X86::VPGATHERDQZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_sf, GATHER, X86::VGATHERDPSZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv4_si, GATHER, X86::VPGATHERDDZ128rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv8_sf, GATHER, X86::VGATHERDPSZ256rm, 0),
+ X86_INTRINSIC_DATA(avx512_mask_gather3siv8_si, GATHER, X86::VPGATHERDDZ256rm, 0),
+
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
OpenPOWER on IntegriCloud