diff options
| author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2014-12-11 15:02:24 +0000 |
|---|---|---|
| committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2014-12-11 15:02:24 +0000 |
| commit | 908dbf48c841db435be3c73c8bf9c371990faa86 (patch) | |
| tree | fb16f99dac1e6a29c9862bca30f5e8a431fbbc2c /llvm/lib | |
| parent | 92292898abd38dfc77c1f8b7da5ba6c451a5cae0 (diff) | |
| download | bcm5719-llvm-908dbf48c841db435be3c73c8bf9c371990faa86.tar.gz bcm5719-llvm-908dbf48c841db435be3c73c8bf9c371990faa86.zip | |
AVX-512: Added all forms of COMPRESS instruction
+ intrinsics + tests
llvm-svn: 224019
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 54 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 48 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 56 |
5 files changed, 160 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1c629ed3164..4a086127786 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16957,9 +16957,31 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), Op.getOperand(1), Op.getOperand(2), DAG); case VSHIFT_MASK: - return getVectorMaskingNode(getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), - Op.getOperand(1), Op.getOperand(2), DAG), - Op.getOperand(4), Op.getOperand(3), Subtarget, DAG); + return getVectorMaskingNode(getTargetVShiftNode(IntrData->Opc0, dl, + Op.getSimpleValueType(), + Op.getOperand(1), + Op.getOperand(2), DAG), + Op.getOperand(4), Op.getOperand(3), Subtarget, + DAG); + case COMPRESS_TO_REG: { + SDValue Mask = Op.getOperand(3); + SDValue DataToCompress = Op.getOperand(1); + SDValue PassThru = Op.getOperand(2); + if (isAllOnes(Mask)) // return data as is + return Op.getOperand(1); + EVT VT = Op.getValueType(); + EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + VT.getVectorNumElements()); + EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + Mask.getValueType().getSizeInBits()); + SDLoc dl(Op); + SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, + DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), + DAG.getIntPtrConstant(0)); + + return DAG.getNode(IntrData->Opc0, dl, VT, VMask, DataToCompress, + PassThru); + } default: break; } @@ -17477,6 +17499,31 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, Results.push_back(Store); return DAG.getMergeValues(Results, dl); } + case COMPRESS_TO_MEM: { + SDLoc dl(Op); + SDValue Mask = Op.getOperand(4); + SDValue DataToCompress = Op.getOperand(3); + SDValue Addr = Op.getOperand(2); + SDValue Chain = Op.getOperand(0); + + if (isAllOnes(Mask)) // return just a store + return DAG.getStore(Chain, dl, DataToCompress, Addr, + MachinePointerInfo(), false, false, 0); + + EVT VT = DataToCompress.getValueType(); + EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + VT.getVectorNumElements()); + EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + Mask.getValueType().getSizeInBits()); + SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, + DAG.getNode(ISD::BITCAST, dl, BitcastVT, Mask), + DAG.getIntPtrConstant(0)); + + SDValue Compressed = DAG.getNode(IntrData->Opc0, dl, VT, VMask, + DataToCompress, DAG.getUNDEF(VT)); + return DAG.getStore(Chain, dl, Compressed, Addr, + MachinePointerInfo(), false, false, 0); + } } } @@ -19662,6 +19709,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PCMPESTRI: return "X86ISD::PCMPESTRI"; case X86ISD::PCMPISTRI: return "X86ISD::PCMPISTRI"; case X86ISD::XTEST: return "X86ISD::XTEST"; + case X86ISD::COMPRESS: return "X86ISD::COMPRESS"; } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 7c6ffa2afa2..b793171e2c5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -379,6 +379,10 @@ namespace llvm { FMADDSUB, FMSUBADD, + // Compress and expand + COMPRESS, + EXPAND, + // Save xmm argument registers to the stack, according to %al. An operator // is needed so that this can be expanded with control flow. VASTART_SAVE_XMM_REGS, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 48287f40f83..b512305d5dd 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -5281,3 +5281,51 @@ multiclass avx512_convert_mask_to_vector<string OpcodeStr> { } defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">; + +//===----------------------------------------------------------------------===// +// AVX-512 - COMPRESS and EXPAND +// +multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _, + string OpcodeStr> { + def rrkz : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst), + (ins _.KRCWM:$mask, _.RC:$src), + OpcodeStr # "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}", + [(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src, + _.ImmAllZerosV)))]>, EVEX_KZ; + + let Constraints = "$src0 = $dst" in + def rrk : AVX5128I<opc, MRMDestReg, (outs _.RC:$dst), + (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src), + OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}", + [(set _.RC:$dst, (_.VT (X86compress _.KRCWM:$mask, _.RC:$src, + _.RC:$src0)))]>, EVEX_K; + + let mayStore = 1 in { + def mrk : AVX5128I<opc, MRMDestMem, (outs), + (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), + OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}", + [(store (_.VT (X86compress _.KRCWM:$mask, _.RC:$src, undef)), + addr:$dst)]>, + EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; + } +} + +multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, + AVX512VLVectorVTInfo VTInfo> { + defm Z : compress_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512; + + let Predicates = [HasVLX] in { + defm Z256 : compress_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256; + defm Z128 : compress_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128; + } +} + +defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", avx512vl_i32_info>, + EVEX; +defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", avx512vl_i64_info>, + EVEX, VEX_W; +defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", avx512vl_f32_info>, + EVEX; +defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info>, + EVEX, VEX_W; + diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 5448b9f9131..04de47be08a 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -283,6 +283,10 @@ def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>; def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>; +def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3, + [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, + SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>; + //===----------------------------------------------------------------------===// // SSE Complex Patterns //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index bcd55c7df4a..e7ad044d1e5 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -21,7 +21,8 @@ enum IntrinsicType { GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI, - INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_SCALAR_MASK_RM + INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_SCALAR_MASK_RM, + COMPRESS_TO_REG, COMPRESS_TO_MEM }; struct IntrinsicData { @@ -70,6 +71,31 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH, X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm), + X86_INTRINSIC_DATA(avx512_mask_compress_store_d_128, + COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_store_d_256, + COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_store_d_512, + COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_128, + COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_256, + COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_store_pd_512, + COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_128, + COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_256, + COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_store_ps_512, + COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_store_q_128, + COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_store_q_256, + COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_store_q_512, + COMPRESS_TO_MEM, X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0), @@ -157,8 +183,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_pmovzxwd, INTR_TYPE_1OP, X86ISD::VZEXT, 0), X86_INTRINSIC_DATA(avx2_pmovzxwq, INTR_TYPE_1OP, X86ISD::VZEXT, 0), X86_INTRINSIC_DATA(avx2_pmul_dq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0), - X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0), - X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0), + X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0), + X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0), X86_INTRINSIC_DATA(avx2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0), X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0), X86_INTRINSIC_DATA(avx2_psign_b, INTR_TYPE_2OP, X86ISD::PSIGN, 0), @@ -207,6 +233,30 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_TO_REG, + X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_TO_REG, + X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_d_512, COMPRESS_TO_REG, + X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_pd_128, COMPRESS_TO_REG, + X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_pd_256, COMPRESS_TO_REG, + X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_pd_512, COMPRESS_TO_REG, + X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_ps_128, COMPRESS_TO_REG, + X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_ps_256, COMPRESS_TO_REG, + X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_ps_512, COMPRESS_TO_REG, + X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_q_128, COMPRESS_TO_REG, + X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_q_256, COMPRESS_TO_REG, + X86ISD::COMPRESS, 0), + X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_TO_REG, + X86ISD::COMPRESS, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0), X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0), |

