summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAG.h44
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp68
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h94
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td169
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td72
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h73
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics.ll20
-rw-r--r--llvm/test/CodeGen/X86/avx512bw-intrinsics.ll18
-rw-r--r--llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll8
-rw-r--r--llvm/test/CodeGen/X86/avx512vl-intrinsics.ll40
10 files changed, 445 insertions, 161 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 4afcd3fd641..a1808b7f55a 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -964,6 +964,14 @@ public:
ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
+
+ /// Return (create a new or find existing) a target-specific node.
+ /// TargetMemSDNode should be derived class from MemSDNode.
+ template <class TargetMemSDNode>
+ SDValue getTargetMemSDNode(SDVTList VTs, ArrayRef<SDValue> Ops,
+ const SDLoc &dl, EVT MemVT,
+ MachineMemOperand *MMO);
+
/// Construct a node to track a Value* through the backend.
SDValue getSrcValue(const Value *v);
@@ -1418,6 +1426,42 @@ template <> struct GraphTraits<SelectionDAG*> : public GraphTraits<SDNode*> {
}
};
+template <class TargetMemSDNode>
+SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs,
+ ArrayRef<SDValue> Ops,
+ const SDLoc &dl, EVT MemVT,
+ MachineMemOperand *MMO) {
+
+ /// Compose node ID and try to find an existing node.
+ FoldingSetNodeID ID;
+ unsigned Opcode =
+ TargetMemSDNode(dl.getIROrder(), DebugLoc(), VTs, MemVT, MMO).getOpcode();
+ ID.AddInteger(Opcode);
+ ID.AddPointer(VTs.VTs);
+ for (auto& Op : Ops) {
+ ID.AddPointer(Op.getNode());
+ ID.AddInteger(Op.getResNo());
+ }
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(getSyntheticNodeSubclassData<TargetMemSDNode>(
+ dl.getIROrder(), VTs, MemVT, MMO));
+
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<TargetMemSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ /// Existing node was not found. Create a new one.
+ auto *N = newSDNode<TargetMemSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ MemVT, MMO);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
} // end namespace llvm
#endif
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b9f506deeb6..a25514f28ef 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19565,6 +19565,33 @@ static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) {
return Chain;
}
+/// Emit Truncating Store with signed or unsigned saturation.
+static SDValue
+EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val,
+ SDValue Ptr, EVT MemVT, MachineMemOperand *MMO,
+ SelectionDAG &DAG) {
+
+ SDVTList VTs = DAG.getVTList(MVT::Other);
+ SDValue Undef = DAG.getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = { Chain, Val, Ptr, Undef };
+ return SignedSat ?
+ DAG.getTargetMemSDNode<TruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
+ DAG.getTargetMemSDNode<TruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
+}
+
+/// Emit Masked Truncating Store with signed or unsigned saturation.
+static SDValue
+EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl,
+ SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT,
+ MachineMemOperand *MMO, SelectionDAG &DAG) {
+
+ SDVTList VTs = DAG.getVTList(MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Mask, Val };
+ return SignedSat ?
+ DAG.getTargetMemSDNode<MaskedTruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
+ DAG.getTargetMemSDNode<MaskedTruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
+}
+
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
@@ -19723,18 +19750,39 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
assert(MemIntr && "Expected MemIntrinsicSDNode!");
- EVT VT = MemIntr->getMemoryVT();
+ EVT MemVT = MemIntr->getMemoryVT();
- if (isAllOnesConstant(Mask)) // return just a truncate store
- return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, VT,
- MemIntr->getMemOperand());
+ uint16_t TruncationOp = IntrData->Opc0;
+ switch (TruncationOp) {
+ case X86ISD::VTRUNC: {
+ if (isAllOnesConstant(Mask)) // return just a truncate store
+ return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MemVT,
+ MemIntr->getMemOperand());
- MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
- SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements());
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+
+ return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT,
+ MemIntr->getMemOperand(), true /* truncating */);
+ }
+ case X86ISD::VTRUNCUS:
+ case X86ISD::VTRUNCS: {
+ bool IsSigned = (TruncationOp == X86ISD::VTRUNCS);
+ if (isAllOnesConstant(Mask))
+ return EmitTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, MemVT,
+ MemIntr->getMemOperand(), DAG);
+
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements());
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
- return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, VT,
- MemIntr->getMemOperand(), true /* truncating */);
+ return EmitMaskedTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr,
+ VMask, MemVT, MemIntr->getMemOperand(), DAG);
+ }
+ default:
+ llvm_unreachable("Unsupported truncstore intrinsic");
+ }
}
+
case EXPAND_FROM_MEM: {
SDValue Mask = Op.getOperand(4);
SDValue PassThru = Op.getOperand(3);
@@ -23470,6 +23518,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
case X86ISD::VTRUNCS: return "X86ISD::VTRUNCS";
case X86ISD::VTRUNCUS: return "X86ISD::VTRUNCUS";
+ case X86ISD::VTRUNCSTORES: return "X86ISD::VTRUNCSTORES";
+ case X86ISD::VTRUNCSTOREUS: return "X86ISD::VTRUNCSTOREUS";
+ case X86ISD::VMTRUNCSTORES: return "X86ISD::VMTRUNCSTORES";
+ case X86ISD::VMTRUNCSTOREUS: return "X86ISD::VMTRUNCSTOREUS";
case X86ISD::VINSERT: return "X86ISD::VINSERT";
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
case X86ISD::VFPEXT_RND: return "X86ISD::VFPEXT_RND";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 7ba9832135d..ceba14ed654 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -606,7 +606,12 @@ namespace llvm {
/// This instruction grabs the address of the next argument
/// from a va_list. (reads and modifies the va_list in memory)
- VAARG_64
+ VAARG_64,
+
+ // Vector truncating store with unsigned/signed saturation
+ VTRUNCSTOREUS, VTRUNCSTORES,
+ // Vector truncating masked store with unsigned/signed saturation
+ VMTRUNCSTOREUS, VMTRUNCSTORES
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
@@ -1289,6 +1294,93 @@ namespace llvm {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
} // end namespace X86
+
+ // Base class for all X86 non-masked store operations.
+ class X86StoreSDNode : public MemSDNode {
+ public:
+ X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
+ SDVTList VTs, EVT MemVT,
+ MachineMemOperand *MMO)
+ :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
+ const SDValue &getValue() const { return getOperand(1); }
+ const SDValue &getBasePtr() const { return getOperand(2); }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == X86ISD::VTRUNCSTORES ||
+ N->getOpcode() == X86ISD::VTRUNCSTOREUS;
+ }
+ };
+
+ // Base class for all X86 masked store operations.
+ // The class has the same order of operands as MaskedStoreSDNode for
+ // convenience.
+ class X86MaskedStoreSDNode : public MemSDNode {
+ public:
+ X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
+ const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ MachineMemOperand *MMO)
+ : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
+
+ const SDValue &getBasePtr() const { return getOperand(1); }
+ const SDValue &getMask() const { return getOperand(2); }
+ const SDValue &getValue() const { return getOperand(3); }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
+ N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
+ }
+ };
+
+ // X86 Truncating Store with Signed saturation.
+ class TruncSStoreSDNode : public X86StoreSDNode {
+ public:
+ TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
+ SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+ : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == X86ISD::VTRUNCSTORES;
+ }
+ };
+
+ // X86 Truncating Store with Unsigned saturation.
+ class TruncUSStoreSDNode : public X86StoreSDNode {
+ public:
+ TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
+ SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+ : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
+ }
+ };
+
+ // X86 Truncating Masked Store with Signed saturation.
+ class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
+ public:
+ MaskedTruncSStoreSDNode(unsigned Order,
+ const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ MachineMemOperand *MMO)
+ : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == X86ISD::VMTRUNCSTORES;
+ }
+ };
+
+ // X86 Truncating Masked Store with Unsigned saturation.
+ class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
+ public:
+ MaskedTruncUSStoreSDNode(unsigned Order,
+ const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ MachineMemOperand *MMO)
+ : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
+ }
+ };
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index ad95ce9cc38..534068f9a13 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7310,23 +7310,6 @@ multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
}
-multiclass avx512_trunc_sat_mr_lowering<X86VectorVTInfo SrcInfo,
- X86VectorVTInfo DestInfo, string sat > {
-
- def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
- DestInfo.Suffix#"_mem_"#SrcInfo.Size)
- addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask),
- (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr,
- (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM),
- (SrcInfo.VT SrcInfo.RC:$src))>;
-
- def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
- DestInfo.Suffix#"_mem_"#SrcInfo.Size)
- addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1),
- (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr) addr:$ptr,
- (SrcInfo.VT SrcInfo.RC:$src))>;
-}
-
multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
@@ -7352,119 +7335,89 @@ multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
truncFrag, mtruncFrag>, EVEX_V512;
}
-multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr, SDNode OpNode,
- AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
- X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
- X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
- X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{
-
- let Predicates = [HasVLX, prd] in {
- defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
- DestInfoZ128, x86memopZ128>,
- avx512_trunc_sat_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
- sat>, EVEX_V128;
-
- defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
- DestInfoZ256, x86memopZ256>,
- avx512_trunc_sat_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
- sat>, EVEX_V256;
- }
- let Predicates = [prd] in
- defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
- DestInfoZ, x86memopZ>,
- avx512_trunc_sat_mr_lowering<VTSrcInfo.info512, DestInfoZ,
- sat>, EVEX_V512;
-}
-
-multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ PatFrag StoreNode, PatFrag MaskedStoreNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
- truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VO>;
-}
-multiclass avx512_trunc_sat_qb<bits<8> opc, string sat, SDNode OpNode> {
- defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qb", OpNode, avx512vl_i64_info,
- v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
- sat>, EVEX_CD8<8, CD8VO>;
+ StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
}
-multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ PatFrag StoreNode, PatFrag MaskedStoreNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
- truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VQ>;
-}
-multiclass avx512_trunc_sat_qw<bits<8> opc, string sat, SDNode OpNode> {
- defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qw", OpNode, avx512vl_i64_info,
- v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
- sat>, EVEX_CD8<16, CD8VQ>;
+ StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
}
-multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ PatFrag StoreNode, PatFrag MaskedStoreNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
- truncstorevi32, masked_truncstorevi32>, EVEX_CD8<32, CD8VH>;
-}
-multiclass avx512_trunc_sat_qd<bits<8> opc, string sat, SDNode OpNode> {
- defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qd", OpNode, avx512vl_i64_info,
- v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
- sat>, EVEX_CD8<32, CD8VH>;
+ StoreNode, MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
}
-multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ PatFrag StoreNode, PatFrag MaskedStoreNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
- truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VQ>;
-}
-multiclass avx512_trunc_sat_db<bits<8> opc, string sat, SDNode OpNode> {
- defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"db", OpNode, avx512vl_i32_info,
- v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
- sat>, EVEX_CD8<8, CD8VQ>;
+ StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
}
-multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ PatFrag StoreNode, PatFrag MaskedStoreNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
- truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VH>;
-}
-multiclass avx512_trunc_sat_dw<bits<8> opc, string sat, SDNode OpNode> {
- defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"dw", OpNode, avx512vl_i32_info,
- v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
- sat>, EVEX_CD8<16, CD8VH>;
+ StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
}
-multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ PatFrag StoreNode, PatFrag MaskedStoreNode> {
defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i16_info,
v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
- truncstorevi8, masked_truncstorevi8,HasBWI>, EVEX_CD8<16, CD8VH>;
-}
-multiclass avx512_trunc_sat_wb<bits<8> opc, string sat, SDNode OpNode> {
- defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"wb", OpNode, avx512vl_i16_info,
- v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
- sat, HasBWI>, EVEX_CD8<16, CD8VH>;
-}
-
-defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>;
-defm VPMOVSQB : avx512_trunc_sat_qb<0x22, "s", X86vtruncs>;
-defm VPMOVUSQB : avx512_trunc_sat_qb<0x12, "us", X86vtruncus>;
-
-defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>;
-defm VPMOVSQW : avx512_trunc_sat_qw<0x24, "s", X86vtruncs>;
-defm VPMOVUSQW : avx512_trunc_sat_qw<0x14, "us", X86vtruncus>;
-
-defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>;
-defm VPMOVSQD : avx512_trunc_sat_qd<0x25, "s", X86vtruncs>;
-defm VPMOVUSQD : avx512_trunc_sat_qd<0x15, "us", X86vtruncus>;
-
-defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>;
-defm VPMOVSDB : avx512_trunc_sat_db<0x21, "s", X86vtruncs>;
-defm VPMOVUSDB : avx512_trunc_sat_db<0x11, "us", X86vtruncus>;
-
-defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>;
-defm VPMOVSDW : avx512_trunc_sat_dw<0x23, "s", X86vtruncs>;
-defm VPMOVUSDW : avx512_trunc_sat_dw<0x13, "us", X86vtruncus>;
-
-defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>;
-defm VPMOVSWB : avx512_trunc_sat_wb<0x20, "s", X86vtruncs>;
-defm VPMOVUSWB : avx512_trunc_sat_wb<0x10, "us", X86vtruncus>;
+ StoreNode, MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
+}
+
+defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc,
+ truncstorevi8, masked_truncstorevi8>;
+defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs,
+ truncstore_s_vi8, masked_truncstore_s_vi8>;
+defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
+ truncstore_us_vi8, masked_truncstore_us_vi8>;
+
+defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc,
+ truncstorevi16, masked_truncstorevi16>;
+defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs,
+ truncstore_s_vi16, masked_truncstore_s_vi16>;
+defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
+ truncstore_us_vi16, masked_truncstore_us_vi16>;
+
+defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc,
+ truncstorevi32, masked_truncstorevi32>;
+defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs,
+ truncstore_s_vi32, masked_truncstore_s_vi32>;
+defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
+ truncstore_us_vi32, masked_truncstore_us_vi32>;
+
+defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc,
+ truncstorevi8, masked_truncstorevi8>;
+defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs,
+ truncstore_s_vi8, masked_truncstore_s_vi8>;
+defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus,
+ truncstore_us_vi8, masked_truncstore_us_vi8>;
+
+defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc,
+ truncstorevi16, masked_truncstorevi16>;
+defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs,
+ truncstore_s_vi16, masked_truncstore_s_vi16>;
+defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus,
+ truncstore_us_vi16, masked_truncstore_us_vi16>;
+
+defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc,
+ truncstorevi8, masked_truncstorevi8>;
+defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs,
+ truncstore_s_vi8, masked_truncstore_s_vi8>;
+defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
+ truncstore_us_vi8, masked_truncstore_us_vi8>;
let Predicates = [HasAVX512, NoVLX] in {
def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 1973684d2ab..c5689d7c698 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -1021,6 +1021,78 @@ def masked_truncstorevi32 :
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
+def X86TruncSStore : SDNode<"X86ISD::VTRUNCSTORES", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def X86TruncUSStore : SDNode<"X86ISD::VTRUNCSTOREUS", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTMaskedStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTMaskedStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def truncstore_s_vi8 : PatFrag<(ops node:$val, node:$ptr),
+ (X86TruncSStore node:$val, node:$ptr), [{
+ return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+
+def truncstore_us_vi8 : PatFrag<(ops node:$val, node:$ptr),
+ (X86TruncUSStore node:$val, node:$ptr), [{
+ return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+
+def truncstore_s_vi16 : PatFrag<(ops node:$val, node:$ptr),
+ (X86TruncSStore node:$val, node:$ptr), [{
+ return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+
+def truncstore_us_vi16 : PatFrag<(ops node:$val, node:$ptr),
+ (X86TruncUSStore node:$val, node:$ptr), [{
+ return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+
+def truncstore_s_vi32 : PatFrag<(ops node:$val, node:$ptr),
+ (X86TruncSStore node:$val, node:$ptr), [{
+ return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def truncstore_us_vi32 : PatFrag<(ops node:$val, node:$ptr),
+ (X86TruncUSStore node:$val, node:$ptr), [{
+ return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def masked_truncstore_s_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+
+def masked_truncstore_us_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+
+def masked_truncstore_s_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+
+def masked_truncstore_us_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+
+def masked_truncstore_s_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def masked_truncstore_us_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
+ return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
def assertzext_i1 :
PatFrag<(ops node:$src), (assertzext node:$src), [{
return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i1;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 0675f896911..8d58d2c229d 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -185,6 +185,79 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_512, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNC, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_128, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_256, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_512, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_128, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_256, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_512, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_128, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_256, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_512, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_128, TRUNCATE_TO_MEM_VI32,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_256, TRUNCATE_TO_MEM_VI32,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_512, TRUNCATE_TO_MEM_VI32,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_128, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_256, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_512, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_128, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_256, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_512, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_128, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_256, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_512, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_128, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_256, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_512, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_128, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_256, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_512, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_128, TRUNCATE_TO_MEM_VI32,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_256, TRUNCATE_TO_MEM_VI32,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_512, TRUNCATE_TO_MEM_VI32,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_128, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_256, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_512, TRUNCATE_TO_MEM_VI16,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_128, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_256, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_512, TRUNCATE_TO_MEM_VI8,
+ X86ISD::VTRUNCUS, 0),
+
X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index 6eedd264ada..2920125cbcf 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -2912,8 +2912,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsqb %zmm0, (%rdi)
; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovsqb %zmm0, (%rdi)
; CHECK-NEXT: vpmovsqb %zmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
@@ -2946,8 +2946,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusqb %zmm0, (%rdi)
; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovusqb %zmm0, (%rdi)
; CHECK-NEXT: vpmovusqb %zmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
@@ -3014,8 +3014,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsqw %zmm0, (%rdi)
; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovsqw %zmm0, (%rdi)
; CHECK-NEXT: vpmovsqw %zmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
@@ -3048,8 +3048,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusqw %zmm0, (%rdi)
; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovusqw %zmm0, (%rdi)
; CHECK-NEXT: vpmovusqw %zmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
@@ -3116,8 +3116,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsqd %zmm0, (%rdi)
; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovsqd %zmm0, (%rdi)
; CHECK-NEXT: vpmovsqd %zmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
@@ -3150,8 +3150,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusqd %zmm0, (%rdi)
; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovusqd %zmm0, (%rdi)
; CHECK-NEXT: vpmovusqd %zmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
@@ -3218,8 +3218,8 @@ declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16)
define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsdb %zmm0, (%rdi)
; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovsdb %zmm0, (%rdi)
; CHECK-NEXT: vpmovsdb %zmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
@@ -3252,8 +3252,8 @@ declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16)
define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusdb %zmm0, (%rdi)
; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovusdb %zmm0, (%rdi)
; CHECK-NEXT: vpmovusdb %zmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
@@ -3320,8 +3320,8 @@ declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16)
define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsdw %zmm0, (%rdi)
; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovsdw %zmm0, (%rdi)
; CHECK-NEXT: vpmovsdw %zmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
@@ -3354,8 +3354,8 @@ declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16)
define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusdw %zmm0, (%rdi)
; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpmovusdw %zmm0, (%rdi)
; CHECK-NEXT: vpmovusdw %zmm0, (%rdi) {%k1}
; CHECK-NEXT: retq
call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
index 05b59aff9b5..d9a9a2d655b 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -2010,18 +2010,17 @@ declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi)
; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi)
; AVX512BW-NEXT: vpmovswb %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx)
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovswb %zmm0, (%ecx) {%k1}
+; AVX512F-32-NEXT: vpmovswb %zmm0, (%eax)
+; AVX512F-32-NEXT: vpmovswb %zmm0, (%eax) {%k1}
; AVX512F-32-NEXT: retl
call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
@@ -2063,18 +2062,17 @@ declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi)
; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi)
; AVX512BW-NEXT: vpmovuswb %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx)
-; AVX512F-32-NEXT: kmovd %eax, %k1
-; AVX512F-32-NEXT: vpmovuswb %zmm0, (%ecx) {%k1}
+; AVX512F-32-NEXT: vpmovuswb %zmm0, (%eax)
+; AVX512F-32-NEXT: vpmovuswb %zmm0, (%eax) {%k1}
; AVX512F-32-NEXT: retl
call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index 5eacf2279f2..3a56de61e5e 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -4090,8 +4090,8 @@ declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8)
define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07]
; CHECK-NEXT: vpmovswb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x20,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
@@ -4124,8 +4124,8 @@ declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8)
define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovuswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovuswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07]
; CHECK-NEXT: vpmovuswb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
@@ -4192,8 +4192,8 @@ declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16)
define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07]
; CHECK-NEXT: vpmovswb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x20,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
@@ -4226,8 +4226,8 @@ declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16)
define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovuswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovuswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07]
; CHECK-NEXT: vpmovuswb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index a99c8641d9c..90b9fc2192f 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -2040,8 +2040,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x22,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovsqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x22,0x07]
; CHECK-NEXT: vpmovsqb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x22,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
@@ -2074,8 +2074,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x12,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovusqb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x12,0x07]
; CHECK-NEXT: vpmovusqb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x12,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
@@ -2142,8 +2142,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x22,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovsqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x22,0x07]
; CHECK-NEXT: vpmovsqb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x22,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
@@ -2176,8 +2176,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x12,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovusqb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x12,0x07]
; CHECK-NEXT: vpmovusqb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x12,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
@@ -2244,8 +2244,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x24,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovsqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x24,0x07]
; CHECK-NEXT: vpmovsqw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x24,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
@@ -2278,8 +2278,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x14,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovusqw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x14,0x07]
; CHECK-NEXT: vpmovusqw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x14,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
@@ -2346,8 +2346,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x24,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovsqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x24,0x07]
; CHECK-NEXT: vpmovsqw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x24,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
@@ -2380,8 +2380,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x14,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovusqw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x14,0x07]
; CHECK-NEXT: vpmovusqw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x14,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
@@ -2448,8 +2448,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x25,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovsqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x25,0x07]
; CHECK-NEXT: vpmovsqd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x25,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
@@ -2482,8 +2482,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x15,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovusqd %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x15,0x07]
; CHECK-NEXT: vpmovusqd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x15,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
@@ -2550,8 +2550,8 @@ declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x25,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovsqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x25,0x07]
; CHECK-NEXT: vpmovsqd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x25,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
@@ -2584,8 +2584,8 @@ declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64>, i8)
define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x15,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovusqd %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x15,0x07]
; CHECK-NEXT: vpmovusqd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x15,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
@@ -2652,8 +2652,8 @@ declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32>, i8)
define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x21,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovsdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x21,0x07]
; CHECK-NEXT: vpmovsdb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x21,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
@@ -2686,8 +2686,8 @@ declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32>, i8)
define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x11,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovusdb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x11,0x07]
; CHECK-NEXT: vpmovusdb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
@@ -2754,8 +2754,8 @@ declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32>, i8)
define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x21,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovsdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x21,0x07]
; CHECK-NEXT: vpmovsdb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x21,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
@@ -2788,8 +2788,8 @@ declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32>, i8)
define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x11,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovusdb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x11,0x07]
; CHECK-NEXT: vpmovusdb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x11,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
@@ -2856,8 +2856,8 @@ declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32>, i8)
define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x23,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovsdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x23,0x07]
; CHECK-NEXT: vpmovsdw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x23,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
@@ -2890,8 +2890,8 @@ declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32>, i8)
define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x13,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovusdw %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x13,0x07]
; CHECK-NEXT: vpmovusdw %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x13,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
@@ -2958,8 +2958,8 @@ declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32>, i8)
define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovsdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x23,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovsdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x23,0x07]
; CHECK-NEXT: vpmovsdw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x23,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
@@ -2992,8 +2992,8 @@ declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32>, i8)
define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovusdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x13,0x07]
; CHECK-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
+; CHECK-NEXT: vpmovusdw %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x13,0x07]
; CHECK-NEXT: vpmovusdw %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x13,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
OpenPOWER on IntegriCloud