diff options
| author | Asaf Badouh <asaf.badouh@intel.com> | 2015-12-28 08:26:26 +0000 |
|---|---|---|
| committer | Asaf Badouh <asaf.badouh@intel.com> | 2015-12-28 08:26:26 +0000 |
| commit | fba562004bf8a3c383453791bccbac3aaef7bcf0 (patch) | |
| tree | a5369a0e928ac6e6b6379b63b2179ae99e3320a9 /llvm/lib/Target/X86 | |
| parent | 5546f51011f0c271ef984d25ceb80c343d976f45 (diff) | |
| download | bcm5719-llvm-fba562004bf8a3c383453791bccbac3aaef7bcf0.tar.gz bcm5719-llvm-fba562004bf8a3c383453791bccbac3aaef7bcf0.zip | |
[X86][AVX512] Lower broadcast sub vector to vector inrtrinsics
lower broadcast<type>x<vector> to shuffles.
there are two cases:
1.src is 128 bits and dest is 512 bits: in this case we will lower it to shuffle with imm = 0.
2.src is 256 bit and dest is 512 bits: in this case we will lower it to shuffle with imm = 01000100b (0x44) that way we will broadcast the 256bit source: ymm[0,1,2,3] => zmm[0,1,2,3,0,1,2,3] then it will mask it with the passthru value (in case it's mask op).
Differential Revision: http://reviews.llvm.org/D15790
llvm-svn: 256490
Diffstat (limited to 'llvm/lib/Target/X86')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 17 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 26 |
2 files changed, 42 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f8a390cf8f7..182a8cd3e8a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16889,6 +16889,23 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); return DAG.getNode(IntrData->Opc0, dl, VT, VMask); } + case BRCST_SUBVEC_TO_VEC: { + SDValue Src = Op.getOperand(1); + SDValue Passthru = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + EVT resVT = Passthru.getValueType(); + SDValue subVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, resVT, + DAG.getUNDEF(resVT), Src, + DAG.getIntPtrConstant(0, dl)); + SDValue immVal; + if (Src.getSimpleValueType().is256BitVector() && resVT.is512BitVector()) + immVal = DAG.getConstant(0x44, dl, MVT::i8); + else + immVal = DAG.getConstant(0, dl, MVT::i8); + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + subVec, subVec, immVal), + Mask, Passthru, Subtarget, DAG); + } default: break; } diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 8acd0c5df86..cb15cc1f3c0 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -27,7 +27,7 @@ enum IntrinsicType { FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM, - COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, + COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK @@ -469,12 +469,36 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::SUBV_BROADCAST, 0), X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, INTR_TYPE_1OP_MASK, X86ISD::SUBV_BROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcastf32x4_256, BRCST_SUBVEC_TO_VEC, + X86ISD::SHUF128, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcastf32x4_512, BRCST_SUBVEC_TO_VEC, + X86ISD::SHUF128, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcastf32x8_512, BRCST_SUBVEC_TO_VEC, + X86ISD::SHUF128, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcastf64x2_256, BRCST_SUBVEC_TO_VEC, + X86ISD::SHUF128, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcastf64x2_512, BRCST_SUBVEC_TO_VEC, + X86ISD::SHUF128, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcastf64x4_512, BRCST_SUBVEC_TO_VEC, + X86ISD::SHUF128, 0), X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_128, INTR_TYPE_1OP_MASK, X86ISD::SUBV_BROADCAST, 0), X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_256, INTR_TYPE_1OP_MASK, X86ISD::SUBV_BROADCAST, 0), X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_512, INTR_TYPE_1OP_MASK, X86ISD::SUBV_BROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti32x4_256, BRCST_SUBVEC_TO_VEC, + X86ISD::SHUF128, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti32x4_512, BRCST_SUBVEC_TO_VEC, + X86ISD::SHUF128, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti32x8_512, BRCST_SUBVEC_TO_VEC, + X86ISD::SHUF128, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti64x2_256, BRCST_SUBVEC_TO_VEC, + X86ISD::SHUF128, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti64x2_512, BRCST_SUBVEC_TO_VEC, + X86ISD::SHUF128, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti64x4_512, BRCST_SUBVEC_TO_VEC, + X86ISD::SHUF128, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_b_128, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_b_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_b_512, CMP_MASK_CC, X86ISD::CMPM, 0), |

