diff options
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 28 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 27 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 35 |
3 files changed, 61 insertions, 29 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 131cf24aee2..866d11f62dc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4227,6 +4227,14 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; break; } + case STOREA: + case STOREU: { + Info.ptrVal = I.getArgOperand(0); + Info.memVT = MVT::getVT(I.getArgOperand(1)->getType()); + Info.align = (IntrData->Type == STOREA ? Info.memVT.getSizeInBits()/8 : 1); + Info.writeMem = true; + break; + } default: return false; } @@ -17659,6 +17667,26 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, return DAG.getMaskedLoad(VT, dl, Chain, Addr, VMask, PassThru, VT, MemIntr->getMemOperand(), ISD::NON_EXTLOAD); } + case STOREU: + case STOREA: { + SDValue Mask = Op.getOperand(4); + SDValue Data = Op.getOperand(3); + SDValue Addr = Op.getOperand(2); + SDValue Chain = Op.getOperand(0); + + MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op); + assert(MemIntr && "Expected MemIntrinsicSDNode!"); + + if (isAllOnesConstant(Mask)) // return just a store + return DAG.getStore(Chain, dl, Data, Addr, MemIntr->getMemOperand()); + + EVT VT = MemIntr->getMemoryVT(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + + return DAG.getMaskedStore(Chain, dl, Data, Addr, VMask, VT, + MemIntr->getMemOperand(), false); + } } } diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 0a369054483..568cc5cd89b 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2707,24 +2707,6 @@ defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 0>, avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>; -def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src), - GR16:$mask), - (VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), - VR512:$src)>; -def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src), - GR8:$mask), - (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), - VR512:$src)>; - -def: Pat<(int_x86_avx512_mask_store_ps_512 addr:$ptr, (v16f32 VR512:$src), - GR16:$mask), - (VMOVAPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), - VR512:$src)>; -def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src), - GR8:$mask), - (VMOVAPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), - VR512:$src)>; - defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, HasAVX512>, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, @@ -2759,15 +2741,6 @@ def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr, (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)), (VMOVDQU64Zrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>; -def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src), - GR16:$mask), - (VMOVDQU32Zmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), - VR512:$src)>; -def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src), - GR8:$mask), - (VMOVDQU64Zmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), - VR512:$src)>; - let AddedComplexity = 20 in { def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src), (bc_v8i64 (v16i32 immAllZerosV)))), diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 34ff209a60d..0ce341a1447 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -29,8 +29,9 @@ enum IntrinsicType { INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, - EXPAND_FROM_MEM, LOADA, LOADU, BLEND, INSERT_SUBVEC, - TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK + EXPAND_FROM_MEM, LOADA, LOADU, STOREA, STOREU, BLEND, INSERT_SUBVEC, + TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, + CONVERT_MASK_TO_VEC, CONVERT_TO_MASK }; struct IntrinsicData { @@ -197,6 +198,36 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86ISD::VTRUNC, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_512, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), + X86_INTRINSIC_DATA(avx512_mask_store_d_128, STOREA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_store_d_256, STOREA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_store_d_512, STOREA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_store_pd_128, STOREA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_store_pd_256, STOREA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_store_pd_512, STOREA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_store_ps_128, STOREA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_store_ps_256, STOREA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_store_ps_512, STOREA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_store_q_128, STOREA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_store_q_256, STOREA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_store_q_512, STOREA, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_b_128, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_b_256, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_b_512, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_d_128, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_d_256, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_d_512, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_pd_128, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_pd_256, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_pd_512, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_ps_128, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_ps_256, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_ps_512, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_q_128, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_q_256, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_q_512, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_w_128, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_w_256, STOREU, ISD::DELETED_NODE, 0), + X86_INTRINSIC_DATA(avx512_mask_storeu_w_512, STOREU, ISD::DELETED_NODE, 0), X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0), |