diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/CodeGen/SelectionDAG.h | 44 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 68 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 94 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 169 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 72 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 73 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 40 |
10 files changed, 445 insertions, 161 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index 4afcd3fd641..a1808b7f55a 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -964,6 +964,14 @@ public: ArrayRef<SDValue> Ops, MachineMemOperand *MMO); SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO); + + /// Return (create a new or find existing) a target-specific node. + /// TargetMemSDNode should be derived class from MemSDNode. + template <class TargetMemSDNode> + SDValue getTargetMemSDNode(SDVTList VTs, ArrayRef<SDValue> Ops, + const SDLoc &dl, EVT MemVT, + MachineMemOperand *MMO); + /// Construct a node to track a Value* through the backend. SDValue getSrcValue(const Value *v); @@ -1418,6 +1426,42 @@ template <> struct GraphTraits<SelectionDAG*> : public GraphTraits<SDNode*> { } }; +template <class TargetMemSDNode> +SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs, + ArrayRef<SDValue> Ops, + const SDLoc &dl, EVT MemVT, + MachineMemOperand *MMO) { + + /// Compose node ID and try to find an existing node. + FoldingSetNodeID ID; + unsigned Opcode = + TargetMemSDNode(dl.getIROrder(), DebugLoc(), VTs, MemVT, MMO).getOpcode(); + ID.AddInteger(Opcode); + ID.AddPointer(VTs.VTs); + for (auto& Op : Ops) { + ID.AddPointer(Op.getNode()); + ID.AddInteger(Op.getResNo()); + } + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(getSyntheticNodeSubclassData<TargetMemSDNode>( + dl.getIROrder(), VTs, MemVT, MMO)); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<TargetMemSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + /// Existing node was not found. Create a new one. + auto *N = newSDNode<TargetMemSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + MemVT, MMO); + createOperands(N, Ops); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + } // end namespace llvm #endif diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b9f506deeb6..a25514f28ef 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19565,6 +19565,33 @@ static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) { return Chain; } +/// Emit Truncating Store with signed or unsigned saturation. +static SDValue +EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val, + SDValue Ptr, EVT MemVT, MachineMemOperand *MMO, + SelectionDAG &DAG) { + + SDVTList VTs = DAG.getVTList(MVT::Other); + SDValue Undef = DAG.getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + return SignedSat ? + DAG.getTargetMemSDNode<TruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) : + DAG.getTargetMemSDNode<TruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO); +} + +/// Emit Masked Truncating Store with signed or unsigned saturation. +static SDValue +EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, + SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, SelectionDAG &DAG) { + + SDVTList VTs = DAG.getVTList(MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, Val }; + return SignedSat ? + DAG.getTargetMemSDNode<MaskedTruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) : + DAG.getTargetMemSDNode<MaskedTruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO); +} + static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, SelectionDAG &DAG) { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); @@ -19723,18 +19750,39 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op); assert(MemIntr && "Expected MemIntrinsicSDNode!"); - EVT VT = MemIntr->getMemoryVT(); + EVT MemVT = MemIntr->getMemoryVT(); - if (isAllOnesConstant(Mask)) // return just a truncate store - return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, VT, - MemIntr->getMemOperand()); + uint16_t TruncationOp = IntrData->Opc0; + switch (TruncationOp) { + case X86ISD::VTRUNC: { + if (isAllOnesConstant(Mask)) // return just a truncate store + return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MemVT, + MemIntr->getMemOperand()); - MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); - SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); + + return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT, + MemIntr->getMemOperand(), true /* truncating */); + } + case X86ISD::VTRUNCUS: + case X86ISD::VTRUNCS: { + bool IsSigned = (TruncationOp == X86ISD::VTRUNCS); + if (isAllOnesConstant(Mask)) + return EmitTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, MemVT, + MemIntr->getMemOperand(), DAG); + + MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); - return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, VT, - MemIntr->getMemOperand(), true /* truncating */); + return EmitMaskedTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, + VMask, MemVT, MemIntr->getMemOperand(), DAG); + } + default: + llvm_unreachable("Unsupported truncstore intrinsic"); + } } + case EXPAND_FROM_MEM: { SDValue Mask = Op.getOperand(4); SDValue PassThru = Op.getOperand(3); @@ -23470,6 +23518,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VTRUNC: return "X86ISD::VTRUNC"; case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS"; case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS"; + case X86ISD::VTRUNCSTORES: return "X86ISD::VTRUNCSTORES"; + case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS"; + case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES"; + case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS"; case X86ISD::VINSERT: return "X86ISD::VINSERT"; case X86ISD::VFPEXT: return "X86ISD::VFPEXT"; case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 7ba9832135d..ceba14ed654 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -606,7 +606,12 @@ namespace llvm { /// This instruction grabs the address of the next argument /// from a va_list. (reads and modifies the va_list in memory) - VAARG_64 + VAARG_64, + + // Vector truncating store with unsigned/signed saturation + VTRUNCSTOREUS, VTRUNCSTORES, + // Vector truncating masked store with unsigned/signed saturation + VMTRUNCSTOREUS, VMTRUNCSTORES // WARNING: Do not add anything in the end unless you want the node to // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all @@ -1289,6 +1294,93 @@ namespace llvm { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); } // end namespace X86 + + // Base class for all X86 non-masked store operations. + class X86StoreSDNode : public MemSDNode { + public: + X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, + SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO) + :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {} + const SDValue &getValue() const { return getOperand(1); } + const SDValue &getBasePtr() const { return getOperand(2); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == X86ISD::VTRUNCSTORES || + N->getOpcode() == X86ISD::VTRUNCSTOREUS; + } + }; + + // Base class for all X86 masked store operations. + // The class has the same order of operands as MaskedStoreSDNode for + // convenience. + class X86MaskedStoreSDNode : public MemSDNode { + public: + X86MaskedStoreSDNode(unsigned Opcode, unsigned Order, + const DebugLoc &dl, SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO) + : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {} + + const SDValue &getBasePtr() const { return getOperand(1); } + const SDValue &getMask() const { return getOperand(2); } + const SDValue &getValue() const { return getOperand(3); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == X86ISD::VMTRUNCSTORES || + N->getOpcode() == X86ISD::VMTRUNCSTOREUS; + } + }; + + // X86 Truncating Store with Signed saturation. + class TruncSStoreSDNode : public X86StoreSDNode { + public: + TruncSStoreSDNode(unsigned Order, const DebugLoc &dl, + SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) + : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {} + + static bool classof(const SDNode *N) { + return N->getOpcode() == X86ISD::VTRUNCSTORES; + } + }; + + // X86 Truncating Store with Unsigned saturation. + class TruncUSStoreSDNode : public X86StoreSDNode { + public: + TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl, + SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) + : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {} + + static bool classof(const SDNode *N) { + return N->getOpcode() == X86ISD::VTRUNCSTOREUS; + } + }; + + // X86 Truncating Masked Store with Signed saturation. + class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode { + public: + MaskedTruncSStoreSDNode(unsigned Order, + const DebugLoc &dl, SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO) + : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {} + + static bool classof(const SDNode *N) { + return N->getOpcode() == X86ISD::VMTRUNCSTORES; + } + }; + + // X86 Truncating Masked Store with Unsigned saturation. + class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode { + public: + MaskedTruncUSStoreSDNode(unsigned Order, + const DebugLoc &dl, SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO) + : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {} + + static bool classof(const SDNode *N) { + return N->getOpcode() == X86ISD::VMTRUNCSTOREUS; + } + }; + } // end namespace llvm #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index ad95ce9cc38..534068f9a13 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7310,23 +7310,6 @@ multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>; } -multiclass avx512_trunc_sat_mr_lowering<X86VectorVTInfo SrcInfo, - X86VectorVTInfo DestInfo, string sat > { - - def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix# - DestInfo.Suffix#"_mem_"#SrcInfo.Size) - addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask), - (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr, - (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM), - (SrcInfo.VT SrcInfo.RC:$src))>; - - def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix# - DestInfo.Suffix#"_mem_"#SrcInfo.Size) - addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1), - (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr) addr:$ptr, - (SrcInfo.VT SrcInfo.RC:$src))>; -} - multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode, AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, @@ -7352,119 +7335,89 @@ multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode, truncFrag, mtruncFrag>, EVEX_V512; } -multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, - X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, - X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, - X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{ - - let Predicates = [HasVLX, prd] in { - defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128, - DestInfoZ128, x86memopZ128>, - avx512_trunc_sat_mr_lowering<VTSrcInfo.info128, DestInfoZ128, - sat>, EVEX_V128; - - defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256, - DestInfoZ256, x86memopZ256>, - avx512_trunc_sat_mr_lowering<VTSrcInfo.info256, DestInfoZ256, - sat>, EVEX_V256; - } - let Predicates = [prd] in - defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512, - DestInfoZ, x86memopZ>, - avx512_trunc_sat_mr_lowering<VTSrcInfo.info512, DestInfoZ, - sat>, EVEX_V512; -} - -multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode, + PatFrag StoreNode, PatFrag MaskedStoreNode> { defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info, v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem, - truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VO>; -} -multiclass avx512_trunc_sat_qb<bits<8> opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qb", OpNode, avx512vl_i64_info, - v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem, - sat>, EVEX_CD8<8, CD8VO>; + StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VO>; } -multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, + PatFrag StoreNode, PatFrag MaskedStoreNode> { defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info, v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem, - truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VQ>; -} -multiclass avx512_trunc_sat_qw<bits<8> opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qw", OpNode, avx512vl_i64_info, - v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem, - sat>, EVEX_CD8<16, CD8VQ>; + StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; } -multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, + PatFrag StoreNode, PatFrag MaskedStoreNode> { defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info, v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem, - truncstorevi32, masked_truncstorevi32>, EVEX_CD8<32, CD8VH>; -} -multiclass avx512_trunc_sat_qd<bits<8> opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qd", OpNode, avx512vl_i64_info, - v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem, - sat>, EVEX_CD8<32, CD8VH>; + StoreNode, MaskedStoreNode>, EVEX_CD8<32, CD8VH>; } -multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, + PatFrag StoreNode, PatFrag MaskedStoreNode> { defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info, v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem, - truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VQ>; -} -multiclass avx512_trunc_sat_db<bits<8> opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"db", OpNode, avx512vl_i32_info, - v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem, - sat>, EVEX_CD8<8, CD8VQ>; + StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; } -multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, + PatFrag StoreNode, PatFrag MaskedStoreNode> { defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info, v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem, - truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VH>; -} -multiclass avx512_trunc_sat_dw<bits<8> opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"dw", OpNode, avx512vl_i32_info, - v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem, - sat>, EVEX_CD8<16, CD8VH>; + StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VH>; } -multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode> { +multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, + PatFrag StoreNode, PatFrag MaskedStoreNode> { defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i16_info, v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem, - truncstorevi8, masked_truncstorevi8,HasBWI>, EVEX_CD8<16, CD8VH>; -} -multiclass avx512_trunc_sat_wb<bits<8> opc, string sat, SDNode OpNode> { - defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"wb", OpNode, avx512vl_i16_info, - v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem, - sat, HasBWI>, EVEX_CD8<16, CD8VH>; -} - -defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>; -defm VPMOVSQB : avx512_trunc_sat_qb<0x22, "s", X86vtruncs>; -defm VPMOVUSQB : avx512_trunc_sat_qb<0x12, "us", X86vtruncus>; - -defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>; -defm VPMOVSQW : avx512_trunc_sat_qw<0x24, "s", X86vtruncs>; -defm VPMOVUSQW : avx512_trunc_sat_qw<0x14, "us", X86vtruncus>; - -defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>; -defm VPMOVSQD : avx512_trunc_sat_qd<0x25, "s", X86vtruncs>; -defm VPMOVUSQD : avx512_trunc_sat_qd<0x15, "us", X86vtruncus>; - -defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>; -defm VPMOVSDB : avx512_trunc_sat_db<0x21, "s", X86vtruncs>; -defm VPMOVUSDB : avx512_trunc_sat_db<0x11, "us", X86vtruncus>; - -defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>; -defm VPMOVSDW : avx512_trunc_sat_dw<0x23, "s", X86vtruncs>; -defm VPMOVUSDW : avx512_trunc_sat_dw<0x13, "us", X86vtruncus>; - -defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>; -defm VPMOVSWB : avx512_trunc_sat_wb<0x20, "s", X86vtruncs>; -defm VPMOVUSWB : avx512_trunc_sat_wb<0x10, "us", X86vtruncus>; + StoreNode, MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; +} + +defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc, + truncstorevi8, masked_truncstorevi8>; +defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, + truncstore_s_vi8, masked_truncstore_s_vi8>; +defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, + truncstore_us_vi8, masked_truncstore_us_vi8>; + +defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc, + truncstorevi16, masked_truncstorevi16>; +defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, + truncstore_s_vi16, masked_truncstore_s_vi16>; +defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, + truncstore_us_vi16, masked_truncstore_us_vi16>; + +defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc, + truncstorevi32, masked_truncstorevi32>; +defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, + truncstore_s_vi32, masked_truncstore_s_vi32>; +defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, + truncstore_us_vi32, masked_truncstore_us_vi32>; + +defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc, + truncstorevi8, masked_truncstorevi8>; +defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, + truncstore_s_vi8, masked_truncstore_s_vi8>; +defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, + truncstore_us_vi8, masked_truncstore_us_vi8>; + +defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc, + truncstorevi16, masked_truncstorevi16>; +defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, + truncstore_s_vi16, masked_truncstore_s_vi16>; +defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, + truncstore_us_vi16, masked_truncstore_us_vi16>; + +defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc, + truncstorevi8, masked_truncstorevi8>; +defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, + truncstore_s_vi8, masked_truncstore_s_vi8>; +defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, + truncstore_us_vi8, masked_truncstore_us_vi8>; let Predicates = [HasAVX512, NoVLX] in { def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))), diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 1973684d2ab..c5689d7c698 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -1021,6 +1021,78 @@ def masked_truncstorevi32 : return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; +def X86TruncSStore : SDNode<"X86ISD::VTRUNCSTORES", SDTStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +def X86TruncUSStore : SDNode<"X86ISD::VTRUNCSTOREUS", SDTStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTMaskedStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTMaskedStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +def truncstore_s_vi8 : PatFrag<(ops node:$val, node:$ptr), + (X86TruncSStore node:$val, node:$ptr), [{ + return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; + +def truncstore_us_vi8 : PatFrag<(ops node:$val, node:$ptr), + (X86TruncUSStore node:$val, node:$ptr), [{ + return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; + +def truncstore_s_vi16 : PatFrag<(ops node:$val, node:$ptr), + (X86TruncSStore node:$val, node:$ptr), [{ + return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; + +def truncstore_us_vi16 : PatFrag<(ops node:$val, node:$ptr), + (X86TruncUSStore node:$val, node:$ptr), [{ + return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; + +def truncstore_s_vi32 : PatFrag<(ops node:$val, node:$ptr), + (X86TruncSStore node:$val, node:$ptr), [{ + return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; + +def truncstore_us_vi32 : PatFrag<(ops node:$val, node:$ptr), + (X86TruncUSStore node:$val, node:$ptr), [{ + return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; + +def masked_truncstore_s_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{ + return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; + +def masked_truncstore_us_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{ + return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8; +}]>; + +def masked_truncstore_s_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{ + return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; + +def masked_truncstore_us_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{ + return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16; +}]>; + +def masked_truncstore_s_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{ + return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; + +def masked_truncstore_us_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3), + (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{ + return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32; +}]>; + def assertzext_i1 : PatFrag<(ops node:$src), (assertzext node:$src), [{ return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i1; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 0675f896911..8d58d2c229d 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -185,6 +185,79 @@ static const IntrinsicData IntrinsicsWithChain[] = { X86ISD::VTRUNC, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_512, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_128, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_256, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_512, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_128, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_256, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_512, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_128, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_256, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_512, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_128, TRUNCATE_TO_MEM_VI32, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_256, TRUNCATE_TO_MEM_VI32, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_512, TRUNCATE_TO_MEM_VI32, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_128, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_256, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_512, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_128, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_256, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_512, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_128, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_256, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_512, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_128, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_256, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_512, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_128, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_256, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_512, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_128, TRUNCATE_TO_MEM_VI32, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_256, TRUNCATE_TO_MEM_VI32, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_512, TRUNCATE_TO_MEM_VI32, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_128, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_256, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_512, TRUNCATE_TO_MEM_VI16, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_128, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_256, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_512, TRUNCATE_TO_MEM_VI8, + X86ISD::VTRUNCUS, 0), + X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0), X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0), diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 6eedd264ada..2920125cbcf 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -2912,8 +2912,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8) define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) ; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) @@ -2946,8 +2946,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8) define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) ; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) @@ -3014,8 +3014,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8) define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) ; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) @@ -3048,8 +3048,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8) define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) ; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) @@ -3116,8 +3116,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8) define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) ; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) @@ -3150,8 +3150,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8) define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) ; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1) @@ -3218,8 +3218,8 @@ declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16) define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) ; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) @@ -3252,8 +3252,8 @@ declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16) define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) ; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) @@ -3320,8 +3320,8 @@ declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16) define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) ; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) @@ -3354,8 +3354,8 @@ declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16) define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) ; CHECK-NEXT: kmovw %esi, %k1 +; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) ; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) {%k1} ; CHECK-NEXT: retq call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1) diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index 05b59aff9b5..d9a9a2d655b 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -2010,18 +2010,17 @@ declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32) define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) ; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) ; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: retq ; ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: ; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) {%k1} +; AVX512F-32-NEXT: vpmovswb %zmm0, (%eax) +; AVX512F-32-NEXT: vpmovswb %zmm0, (%eax) {%k1} ; AVX512F-32-NEXT: retl call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) @@ -2063,18 +2062,17 @@ declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32) define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) ; AVX512BW-NEXT: kmovd %esi, %k1 +; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) ; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: retq ; ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: ; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) -; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) {%k1} +; AVX512F-32-NEXT: vpmovuswb %zmm0, (%eax) +; AVX512F-32-NEXT: vpmovuswb %zmm0, (%eax) {%k1} ; AVX512F-32-NEXT: retl call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 5eacf2279f2..3a56de61e5e 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -4090,8 +4090,8 @@ declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8) define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07] ; CHECK-NEXT: vpmovswb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x20,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) @@ -4124,8 +4124,8 @@ declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8) define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovuswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovuswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07] ; CHECK-NEXT: vpmovuswb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1) @@ -4192,8 +4192,8 @@ declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16) define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07] ; CHECK-NEXT: vpmovswb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x20,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) @@ -4226,8 +4226,8 @@ declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16) define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovuswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovuswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07] ; CHECK-NEXT: vpmovuswb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x10,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1) diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index a99c8641d9c..90b9fc2192f 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -2040,8 +2040,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64>, i8) define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x22,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x22,0x07] ; CHECK-NEXT: vpmovsqb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x22,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) @@ -2074,8 +2074,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64>, i8) define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x12,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x12,0x07] ; CHECK-NEXT: vpmovusqb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x12,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) @@ -2142,8 +2142,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64>, i8) define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x22,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x22,0x07] ; CHECK-NEXT: vpmovsqb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x22,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) @@ -2176,8 +2176,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64>, i8) define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x12,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x12,0x07] ; CHECK-NEXT: vpmovusqb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x12,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) @@ -2244,8 +2244,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64>, i8) define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x24,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x24,0x07] ; CHECK-NEXT: vpmovsqw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x24,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) @@ -2278,8 +2278,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64>, i8) define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x14,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x14,0x07] ; CHECK-NEXT: vpmovusqw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x14,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) @@ -2346,8 +2346,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64>, i8) define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x24,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x24,0x07] ; CHECK-NEXT: vpmovsqw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x24,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) @@ -2380,8 +2380,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64>, i8) define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x14,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x14,0x07] ; CHECK-NEXT: vpmovusqw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x14,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) @@ -2448,8 +2448,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64>, i8) define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x25,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x25,0x07] ; CHECK-NEXT: vpmovsqd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x25,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) @@ -2482,8 +2482,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64>, i8) define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x15,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x15,0x07] ; CHECK-NEXT: vpmovusqd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x15,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1) @@ -2550,8 +2550,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64>, i8) define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x25,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x25,0x07] ; CHECK-NEXT: vpmovsqd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x25,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) @@ -2584,8 +2584,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64>, i8) define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x15,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x15,0x07] ; CHECK-NEXT: vpmovusqd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x15,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1) @@ -2652,8 +2652,8 @@ declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32>, i8) define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x21,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x21,0x07] ; CHECK-NEXT: vpmovsdb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x21,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) @@ -2686,8 +2686,8 @@ declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32>, i8) define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x11,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x11,0x07] ; CHECK-NEXT: vpmovusdb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) @@ -2754,8 +2754,8 @@ declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32>, i8) define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x21,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x21,0x07] ; CHECK-NEXT: vpmovsdb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x21,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) @@ -2788,8 +2788,8 @@ declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32>, i8) define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x11,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x11,0x07] ; CHECK-NEXT: vpmovusdb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x11,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) @@ -2856,8 +2856,8 @@ declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32>, i8) define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x23,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x23,0x07] ; CHECK-NEXT: vpmovsdw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x23,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) @@ -2890,8 +2890,8 @@ declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32>, i8) define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x13,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x13,0x07] ; CHECK-NEXT: vpmovusdw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x13,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1) @@ -2958,8 +2958,8 @@ declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32>, i8) define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovsdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x23,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovsdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x23,0x07] ; CHECK-NEXT: vpmovsdw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x23,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) @@ -2992,8 +2992,8 @@ declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32>, i8) define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpmovusdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x13,0x07] ; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] +; CHECK-NEXT: vpmovusdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x13,0x07] ; CHECK-NEXT: vpmovusdw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x13,0x07] ; CHECK-NEXT: retq ## encoding: [0xc3] call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1) |

