[X86] Add new variadic avx512 compress/expand intrinsics that use vXi1 types for the mask argument.

Remove and autoupgrade the old intrinsics llvm-svn: 352343
author: Craig Topper <craig.topper@intel.com> 2019-01-28 07:03:03 +0000
committer: Craig Topper <craig.topper@intel.com> 2019-01-28 07:03:03 +0000
commit: 453150bc18d74c2ebbef8f780b3b9d2e6278a529 (patch)
tree: fd42d6959c047c17a3faa85136cc3640d9a9fc46
parent: b23d5ccafc6db995486ec69925127c61faa5152f (diff)
download: bcm5719-llvm-453150bc18d74c2ebbef8f780b3b9d2e6278a529.tar.gz
bcm5719-llvm-453150bc18d74c2ebbef8f780b3b9d2e6278a529.zip
12 files changed, 2058 insertions, 565 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 5f40a861ea9..a8c8cba56f4 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -4029,156 +4029,16 @@ let TargetPrefix = "x86" in {
 
 // Compress, Expand
 let TargetPrefix = "x86" in {
-  def int_x86_avx512_mask_compress_ps_512 :
-                             GCCBuiltin<"__builtin_ia32_compresssf512_mask">,
-        Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_pd_512 :
-                             GCCBuiltin<"__builtin_ia32_compressdf512_mask">,
-        Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_ps_256 :
-                             GCCBuiltin<"__builtin_ia32_compresssf256_mask">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_pd_256 :
-                             GCCBuiltin<"__builtin_ia32_compressdf256_mask">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_ps_128 :
-                             GCCBuiltin<"__builtin_ia32_compresssf128_mask">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_pd_128 :
-                             GCCBuiltin<"__builtin_ia32_compressdf128_mask">,
-        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_compress_d_512 :
-                             GCCBuiltin<"__builtin_ia32_compresssi512_mask">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_q_512 :
-                             GCCBuiltin<"__builtin_ia32_compressdi512_mask">,
-        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_d_256 :
-                             GCCBuiltin<"__builtin_ia32_compresssi256_mask">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_q_256 :
-                             GCCBuiltin<"__builtin_ia32_compressdi256_mask">,
-        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_d_128 :
-                             GCCBuiltin<"__builtin_ia32_compresssi128_mask">,
-        Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_q_128 :
-                             GCCBuiltin<"__builtin_ia32_compressdi128_mask">,
-        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_compress_b_512 :
-                             GCCBuiltin<"__builtin_ia32_compressqi512_mask">,
-        Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
-                   llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_w_512 :
-                             GCCBuiltin<"__builtin_ia32_compresshi512_mask">,
-        Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                   llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_b_256 :
-                             GCCBuiltin<"__builtin_ia32_compressqi256_mask">,
-        Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                   llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_w_256 :
-                             GCCBuiltin<"__builtin_ia32_compresshi256_mask">,
-        Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_b_128 :
-                             GCCBuiltin<"__builtin_ia32_compressqi128_mask">,
-        Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_compress_w_128 :
-                             GCCBuiltin<"__builtin_ia32_compresshi128_mask">,
-        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-
-// expand
-  def int_x86_avx512_mask_expand_ps_512 :
-                             GCCBuiltin<"__builtin_ia32_expandsf512_mask">,
-        Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_pd_512 :
-                             GCCBuiltin<"__builtin_ia32_expanddf512_mask">,
-        Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_ps_256 :
-                             GCCBuiltin<"__builtin_ia32_expandsf256_mask">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_pd_256 :
-                             GCCBuiltin<"__builtin_ia32_expanddf256_mask">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_ps_128 :
-                             GCCBuiltin<"__builtin_ia32_expandsf128_mask">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_pd_128 :
-                             GCCBuiltin<"__builtin_ia32_expanddf128_mask">,
-        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_expand_d_512 :
-                             GCCBuiltin<"__builtin_ia32_expandsi512_mask">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_q_512 :
-                             GCCBuiltin<"__builtin_ia32_expanddi512_mask">,
-        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_d_256 :
-                             GCCBuiltin<"__builtin_ia32_expandsi256_mask">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_q_256 :
-                             GCCBuiltin<"__builtin_ia32_expanddi256_mask">,
-        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_d_128 :
-                             GCCBuiltin<"__builtin_ia32_expandsi128_mask">,
-        Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_q_128 :
-                             GCCBuiltin<"__builtin_ia32_expanddi128_mask">,
-        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_expand_b_512 :
-                            GCCBuiltin<"__builtin_ia32_expandqi512_mask">,
-        Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
-                   llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_w_512 :
-                            GCCBuiltin<"__builtin_ia32_expandhi512_mask">,
-        Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                   llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_b_256 :
-                            GCCBuiltin<"__builtin_ia32_expandqi256_mask">,
-        Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                   llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_w_256 :
-                            GCCBuiltin<"__builtin_ia32_expandhi256_mask">,
-        Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_b_128 :
-                            GCCBuiltin<"__builtin_ia32_expandqi128_mask">,
-        Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_expand_w_128 :
-                            GCCBuiltin<"__builtin_ia32_expandhi128_mask">,
-        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_compress :
+        Intrinsic<[llvm_anyvector_ty],
+                  [LLVMMatchType<0>, LLVMMatchType<0>,
+                   LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+                  [IntrNoMem]>;
+  def int_x86_avx512_mask_expand :
+        Intrinsic<[llvm_anyvector_ty],
+                  [LLVMMatchType<0>, LLVMMatchType<0>,
+                   LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+                  [IntrNoMem]>;
 }
 
 // truncate
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index fe1a4ffd0fa..396a425aef6 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -342,6 +342,16 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
       Name.startswith("avx512.mask.load.") || // Added in 3.9
       Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
       Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
+      Name.startswith("avx512.mask.expand.b") || // Added in 9.0
+      Name.startswith("avx512.mask.expand.w") || // Added in 9.0
+      Name.startswith("avx512.mask.expand.d") || // Added in 9.0
+      Name.startswith("avx512.mask.expand.q") || // Added in 9.0
+      Name.startswith("avx512.mask.expand.p") || // Added in 9.0
+      Name.startswith("avx512.mask.compress.b") || // Added in 9.0
+      Name.startswith("avx512.mask.compress.w") || // Added in 9.0
+      Name.startswith("avx512.mask.compress.d") || // Added in 9.0
+      Name.startswith("avx512.mask.compress.q") || // Added in 9.0
+      Name.startswith("avx512.mask.compress.p") || // Added in 9.0
       Name == "sse42.crc32.64.8" || // Added in 3.4
       Name.startswith("avx.vbroadcast.s") || // Added in 3.5
       Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
@@ -2055,6 +2065,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
                                                 Intrinsic::masked_compressstore,
                                                 ResultTy);
       Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
+    } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
+                         Name.startswith("avx512.mask.expand."))) {
+      Type *ResultTy = CI->getType();
+
+      Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
+                                     ResultTy->getVectorNumElements());
+
+      bool IsCompress = Name[12] == 'c';
+      Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
+                                     : Intrinsic::x86_avx512_mask_expand;
+      Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
+      Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
+                                       MaskVec });
     } else if (IsX86 && Name.startswith("xop.vpcom")) {
       bool IsSigned;
       if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bca3e74b7d4..5047500a672 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22037,12 +22037,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       SDValue Mask = Op.getOperand(3);
       SDValue DataToCompress = Op.getOperand(1);
       SDValue PassThru = Op.getOperand(2);
-      if (isAllOnesConstant(Mask)) // return data as is
+      if (ISD::isBuildVectorAllOnes(Mask.getNode())) // return data as is
         return Op.getOperand(1);
 
-      MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
-      Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
-
       // Avoid false dependency.
       if (PassThru.isUndef())
         PassThru = DAG.getConstant(0, dl, VT);
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 5e83090702c..9e161b3266c 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -465,41 +465,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_cmp_ss,     CMP_MASK_SCALAR_CC,
                      X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
 
-  X86_INTRINSIC_DATA(avx512_mask_compress_b_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_b_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_b_512,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_d_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_d_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_d_512,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_pd_128, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_pd_256, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_pd_512, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_ps_128, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_ps_256, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_ps_512, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_q_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_q_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_q_512,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_w_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_w_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::COMPRESS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_compress_w_512,  COMPRESS_EXPAND_IN_REG,
+  X86_INTRINSIC_DATA(avx512_mask_compress,        COMPRESS_EXPAND_IN_REG,
                      X86ISD::COMPRESS, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, CVTPD2DQ_MASK,
                      X86ISD::CVTP2SI, X86ISD::MCVTP2SI),
@@ -607,41 +573,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::FDIVS_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM,
                      X86ISD::FDIVS_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_b_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_b_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_b_512,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_d_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_d_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_d_512,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_pd_128, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_pd_256, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_pd_512, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_ps_128, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_ps_256, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_ps_512, COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_q_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_q_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_q_512,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_w_128,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_w_256,  COMPRESS_EXPAND_IN_REG,
-                     X86ISD::EXPAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_expand_w_512,  COMPRESS_EXPAND_IN_REG,
+  X86_INTRINSIC_DATA(avx512_mask_expand,        COMPRESS_EXPAND_IN_REG,
                      X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_128, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
   X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_256, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
index 716e050c5a8..b3d8e14696e 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
@@ -9782,3 +9782,363 @@ define <16 x float> @test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16
   %res2 = fadd <16 x float> %res, %res1
   ret <16 x float> %res2
 }
+
+define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_pd_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vcompresspd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0xc1]
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_pd_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vcompresspd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0xc1]
+; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
+  ret <8 x double> %res
+}
+
+define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_pd_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_pd_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
+  ret <8 x double> %res
+}
+
+define <8 x double> @test_compress_pd_512(<8 x double> %data) {
+; CHECK-LABEL: test_compress_pd_512:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
+  %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
+  ret <8 x double> %res
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
+
+define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_compress_ps_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcompressps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0xc1]
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_ps_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vcompressps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0xc1]
+; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) {
+; X86-LABEL: test_maskz_compress_ps_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vcompressps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8a,0xc0]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_ps_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vcompressps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8a,0xc0]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_compress_ps_512(<16 x float> %data) {
+; CHECK-LABEL: test_compress_ps_512:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
+  %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
+  ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
+
+define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_q_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpcompressq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0xc1]
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_q_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpcompressq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0xc1]
+; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_q_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpcompressq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8b,0xc0]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_q_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpcompressq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8b,0xc0]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_compress_q_512(<8 x i64> %data) {
+; CHECK-LABEL: test_compress_q_512:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
+
+define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_compress_d_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpcompressd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0xc1]
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_d_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpcompressd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0xc1]
+; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) {
+; X86-LABEL: test_maskz_compress_d_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpcompressd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8b,0xc0]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_d_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpcompressd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x8b,0xc0]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_compress_d_512(<16 x i32> %data) {
+; CHECK-LABEL: test_compress_d_512:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
+  ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
+
+define <8 x double> @test_expand_pd_512(<8 x double> %data) {
+; CHECK-LABEL: test_expand_pd_512:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
+  %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
+  ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_pd_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vexpandpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0xc8]
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_pd_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vexpandpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0xc8]
+; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
+  ret <8 x double> %res
+}
+
+define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_pd_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_pd_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
+  ret <8 x double> %res
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
+
+define <16 x float> @test_expand_ps_512(<16 x float> %data) {
+; CHECK-LABEL: test_expand_ps_512:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
+  %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_expand_ps_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vexpandps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0xc8]
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_ps_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vexpandps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0xc8]
+; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) {
+; X86-LABEL: test_maskz_expand_ps_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vexpandps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0xc0]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_ps_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vexpandps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0xc0]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
+  ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
+
+define <8 x i64> @test_expand_q_512(<8 x i64> %data) {
+; CHECK-LABEL: test_expand_q_512:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_q_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpexpandq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0xc8]
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_q_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpexpandq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0xc8]
+; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_q_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpexpandq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0xc0]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_q_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpexpandq %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0xc0]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
+
+define <16 x i32> @test_expand_d_512(<16 x i32> %data) {
+; CHECK-LABEL: test_expand_d_512:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_expand_d_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpexpandd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0xc8]
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_d_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpexpandd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0xc8]
+; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) {
+; X86-LABEL: test_maskz_expand_d_512:
+; X86:       ## %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0xc0]
+; X86-NEXT:    retl ## encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_d_512:
+; X64:       ## %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0xc0]
+; X64-NEXT:    retq ## encoding: [0xc3]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
+  ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index e49709db3ec..ca5783f2137 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -9,8 +9,9 @@ define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double>
 ; CHECK-NEXT:    vcompresspd %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
-  ret <8 x double> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> %passthru, <8 x i1> %1)
+  ret <8 x double> %2
 }
 
 define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) {
@@ -19,20 +20,19 @@ define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) {
 ; CHECK-NEXT:    kmovw %edi, %k1
 ; CHECK-NEXT:    vcompresspd %zmm0, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
-  ret <8 x double> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> zeroinitializer, <8 x i1> %1)
+  ret <8 x double> %2
 }
 
 define <8 x double> @test_compress_pd_512(<8 x double> %data) {
 ; CHECK-LABEL: test_compress_pd_512:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
-  ret <8 x double> %res
+  %1 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <8 x double> %1
 }
 
-declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
-
 define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
 ; CHECK-LABEL: test_mask_compress_ps_512:
 ; CHECK:       ## %bb.0:
@@ -40,8 +40,9 @@ define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float>
 ; CHECK-NEXT:    vcompressps %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; CHECK-NEXT:    retq
-  %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
-  ret <16 x float> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> %passthru, <16 x i1> %1)
+  ret <16 x float> %2
 }
 
 define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) {
@@ -50,20 +51,19 @@ define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) {
 ; CHECK-NEXT:    kmovw %edi, %k1
 ; CHECK-NEXT:    vcompressps %zmm0, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
-  %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
-  ret <16 x float> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> zeroinitializer, <16 x i1> %1)
+  ret <16 x float> %2
 }
 
 define <16 x float> @test_compress_ps_512(<16 x float> %data) {
 ; CHECK-LABEL: test_compress_ps_512:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    retq
-  %res = call <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
-  ret <16 x float> %res
+  %1 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <16 x float> %1
 }
 
-declare <16 x float> @llvm.x86.avx512.mask.compress.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
-
 define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
 ; CHECK-LABEL: test_mask_compress_q_512:
 ; CHECK:       ## %bb.0:
@@ -71,8 +71,9 @@ define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru,
 ; CHECK-NEXT:    vpcompressq %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; CHECK-NEXT:    retq
-  %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
-  ret <8 x i64> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> %passthru, <8 x i1> %1)
+  ret <8 x i64> %2
 }
 
 define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) {
@@ -81,20 +82,19 @@ define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) {
 ; CHECK-NEXT:    kmovw %edi, %k1
 ; CHECK-NEXT:    vpcompressq %zmm0, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
-  %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
-  ret <8 x i64> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> zeroinitializer, <8 x i1> %1)
+  ret <8 x i64> %2
 }
 
 define <8 x i64> @test_compress_q_512(<8 x i64> %data) {
 ; CHECK-LABEL: test_compress_q_512:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    retq
-  %res = call <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
-  ret <8 x i64> %res
+  %1 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <8 x i64> %1
 }
 
-declare <8 x i64> @llvm.x86.avx512.mask.compress.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
-
 define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
 ; CHECK-LABEL: test_mask_compress_d_512:
 ; CHECK:       ## %bb.0:
@@ -102,8 +102,9 @@ define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passth
 ; CHECK-NEXT:    vpcompressd %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; CHECK-NEXT:    retq
-  %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
-  ret <16 x i32> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> %passthru, <16 x i1> %1)
+  ret <16 x i32> %2
 }
 
 define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) {
@@ -112,26 +113,25 @@ define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) {
 ; CHECK-NEXT:    kmovw %edi, %k1
 ; CHECK-NEXT:    vpcompressd %zmm0, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
-  %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
-  ret <16 x i32> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> zeroinitializer, <16 x i1> %1)
+  ret <16 x i32> %2
 }
 
 define <16 x i32> @test_compress_d_512(<16 x i32> %data) {
 ; CHECK-LABEL: test_compress_d_512:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    retq
-  %res = call <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
-  ret <16 x i32> %res
+  %1 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <16 x i32> %1
 }
 
-declare <16 x i32> @llvm.x86.avx512.mask.compress.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
-
 define <8 x double> @test_expand_pd_512(<8 x double> %data) {
 ; CHECK-LABEL: test_expand_pd_512:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> undef, i8 -1)
-  ret <8 x double> %res
+  %1 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <8 x double> %1
 }
 
 define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) {
@@ -141,8 +141,9 @@ define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %p
 ; CHECK-NEXT:    vexpandpd %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %passthru, i8 %mask)
-  ret <8 x double> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> %passthru, <8 x i1> %1)
+  ret <8 x double> %2
 }
 
 define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) {
@@ -151,18 +152,17 @@ define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) {
 ; CHECK-NEXT:    kmovw %edi, %k1
 ; CHECK-NEXT:    vexpandpd %zmm0, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
-  %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
-  ret <8 x double> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> zeroinitializer, <8 x i1> %1)
+  ret <8 x double> %2
 }
 
-declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
-
 define <16 x float> @test_expand_ps_512(<16 x float> %data) {
 ; CHECK-LABEL: test_expand_ps_512:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    retq
-  %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> undef, i16 -1)
-  ret <16 x float> %res
+  %1 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <16 x float> %1
 }
 
 define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) {
@@ -172,8 +172,9 @@ define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %p
 ; CHECK-NEXT:    vexpandps %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; CHECK-NEXT:    retq
-  %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %passthru, i16 %mask)
-  ret <16 x float> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> %passthru, <16 x i1> %1)
+  ret <16 x float> %2
 }
 
 define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) {
@@ -182,18 +183,17 @@ define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) {
 ; CHECK-NEXT:    kmovw %edi, %k1
 ; CHECK-NEXT:    vexpandps %zmm0, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
-  %res = call <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> zeroinitializer, i16 %mask)
-  ret <16 x float> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> zeroinitializer, <16 x i1> %1)
+  ret <16 x float> %2
 }
 
-declare <16 x float> @llvm.x86.avx512.mask.expand.ps.512(<16 x float> %data, <16 x float> %src0, i16 %mask)
-
 define <8 x i64> @test_expand_q_512(<8 x i64> %data) {
 ; CHECK-LABEL: test_expand_q_512:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    retq
-  %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> undef, i8 -1)
-  ret <8 x i64> %res
+  %1 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <8 x i64> %1
 }
 
 define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) {
@@ -203,8 +203,9 @@ define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i
 ; CHECK-NEXT:    vpexpandq %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; CHECK-NEXT:    retq
-  %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask)
-  ret <8 x i64> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> %passthru, <8 x i1> %1)
+  ret <8 x i64> %2
 }
 
 define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) {
@@ -213,18 +214,17 @@ define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) {
 ; CHECK-NEXT:    kmovw %edi, %k1
 ; CHECK-NEXT:    vpexpandq %zmm0, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
-  %res = call <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> zeroinitializer, i8 %mask)
-  ret <8 x i64> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> zeroinitializer, <8 x i1> %1)
+  ret <8 x i64> %2
 }
 
-declare <8 x i64> @llvm.x86.avx512.mask.expand.q.512(<8 x i64> %data, <8 x i64> %src0, i8 %mask)
-
 define <16 x i32> @test_expand_d_512(<16 x i32> %data) {
 ; CHECK-LABEL: test_expand_d_512:
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    retq
-  %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> undef, i16 -1)
-  ret <16 x i32> %res
+  %1 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <16 x i32> %1
 }
 
 define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) {
@@ -234,8 +234,9 @@ define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru
 ; CHECK-NEXT:    vpexpandd %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; CHECK-NEXT:    retq
-  %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask)
-  ret <16 x i32> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> %passthru, <16 x i1> %1)
+  ret <16 x i32> %2
 }
 
 define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) {
@@ -244,12 +245,11 @@ define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) {
 ; CHECK-NEXT:    kmovw %edi, %k1
 ; CHECK-NEXT:    vpexpandd %zmm0, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
-  %res = call <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> zeroinitializer, i16 %mask)
-  ret <16 x i32> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> zeroinitializer, <16 x i1> %1)
+  ret <16 x i32> %2
 }
 
-declare <16 x i32> @llvm.x86.avx512.mask.expand.d.512(<16 x i32> %data, <16 x i32> %src0, i16 %mask)
-
 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
 ; CHECK-LABEL: test_rcp_ps_512:
 ; CHECK:       ## %bb.0:
@@ -5546,3 +5546,12 @@ entry:
   %3 = select <16 x i1> %2, <16 x float> %f, <16 x float> %e
   ret <16 x float> %3
 }
+
+declare <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double>, <8 x double>, <8 x i1>)
+declare <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float>, <16 x float>, <16 x i1>)
+declare <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64>, <8 x i64>, <8 x i1>)
+declare <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32>, <16 x i32>, <16 x i1>)
+declare <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double>, <8 x double>, <8 x i1>)
+declare <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float>, <16 x float>, <16 x i1>)
+declare <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64>, <8 x i64>, <8 x i1>)
+declare <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32>, <16 x i32>, <16 x i1>)
diff --git a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll
index 4816add5495..96eba89f6a4 100644
--- a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll
@@ -55,6 +55,50 @@ define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data) {
   ret <32 x i16> %res
 }
 
+define <32 x i16> @test_expand_w_512(<32 x i16> %data) {
+; CHECK-LABEL: test_expand_w_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
+; X86-LABEL: test_mask_expand_w_512:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_w_512:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
+; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) {
+; X86-LABEL: test_maskz_expand_w_512:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_w_512:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
+  ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
+
 define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
 ; X86-LABEL: test_mask_expand_load_b_512:
 ; X86:       # %bb.0:
@@ -108,6 +152,50 @@ define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data) {
   ret <64 x i8> %res
 }
 
+define <64 x i8> @test_expand_b_512(<64 x i8> %data) {
+; CHECK-LABEL: test_expand_b_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
+  ret <64 x i8> %res
+}
+
+define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
+; X86-LABEL: test_mask_expand_b_512:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_b_512:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; X64-NEXT:    vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
+; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
+  ret <64 x i8> %res
+}
+
+define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) {
+; X86-LABEL: test_maskz_expand_b_512:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_b_512:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; X64-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
+  ret <64 x i8> %res
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
+
 define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
 ; X86-LABEL: test_mask_compress_store_w_512:
 ; X86:       # %bb.0:
@@ -148,6 +236,50 @@ define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) {
   ret void
 }
 
+define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
+; X86-LABEL: test_mask_compress_w_512:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_w_512:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
+; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
+; X86-LABEL: test_maskz_compress_w_512:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_w_512:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
+; CHECK-LABEL: test_compress_w_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
+  ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
+
 define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
 ; X86-LABEL: test_mask_compress_store_b_512:
 ; X86:       # %bb.0:
@@ -188,6 +320,50 @@ define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) {
   ret void
 }
 
+define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
+; X86-LABEL: test_mask_compress_b_512:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_b_512:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; X64-NEXT:    vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
+; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
+  ret <64 x i8> %res
+}
+
+define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
+; X86-LABEL: test_maskz_compress_b_512:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_b_512:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; X64-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
+  ret <64 x i8> %res
+}
+
+define <64 x i8> @test_compress_b_512(<64 x i8> %data) {
+; CHECK-LABEL: test_compress_b_512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
+  ret <64 x i8> %res
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
+
 define <16 x i32>@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
 ; X86:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll
index cf3f4f8a73f..a16b91b6218 100644
--- a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll
@@ -62,8 +62,8 @@ define <32 x i16> @test_expand_w_512(<32 x i16> %data) {
 ; CHECK-LABEL: test_expand_w_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
-  ret <32 x i16> %res
+  %1 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <32 x i16> %1
 }
 
 define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
@@ -80,8 +80,9 @@ define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru
 ; X64-NEXT:    vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
-  ret <32 x i16> %res
+  %1 = bitcast i32 %mask to <32 x i1>
+  %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1)
+  ret <32 x i16> %2
 }
 
 define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) {
@@ -96,12 +97,11 @@ define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) {
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 ; X64-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
-  ret <32 x i16> %res
+  %1 = bitcast i32 %mask to <32 x i1>
+  %2 = call <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1)
+  ret <32 x i16> %2
 }
 
-declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
-
 define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
 ; X86-LABEL: test_mask_expand_load_b_512:
 ; X86:       # %bb.0:
@@ -159,16 +159,14 @@ define <64 x i8> @test_expand_b_512(<64 x i8> %data) {
 ; CHECK-LABEL: test_expand_b_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
-  ret <64 x i8> %res
+  %1 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <64 x i8> %1
 }
 
 define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
 ; X86-LABEL: test_mask_expand_b_512:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
-; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT:    kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
+; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
 ; X86-NEXT:    vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
 ; X86-NEXT:    retl # encoding: [0xc3]
@@ -179,16 +177,15 @@ define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i
 ; X64-NEXT:    vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
-  ret <64 x i8> %res
+  %1 = bitcast i64 %mask to <64 x i1>
+  %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1)
+  ret <64 x i8> %2
 }
 
 define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) {
 ; X86-LABEL: test_maskz_expand_b_512:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
-; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT:    kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
+; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
 ; X86-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
@@ -197,12 +194,11 @@ define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) {
 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
 ; X64-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
-  ret <64 x i8> %res
+  %1 = bitcast i64 %mask to <64 x i1>
+  %2 = call <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1)
+  ret <64 x i8> %2
 }
 
-declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
-
 define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
 ; X86-LABEL: test_mask_compress_store_w_512:
 ; X86:       # %bb.0:
@@ -224,6 +220,26 @@ define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %ma
   ret void
 }
 
+define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) {
+; X86-LABEL: test_compress_store_w_512:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
+; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
+; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
+; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_compress_store_w_512:
+; X64:       # %bb.0:
+; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
+; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
+; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %1 = bitcast i8* %addr to i16*
+  call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, i16* %1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret void
+}
+
 define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
 ; X86-LABEL: test_mask_compress_w_512:
 ; X86:       # %bb.0:
@@ -238,8 +254,9 @@ define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passth
 ; X64-NEXT:    vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
-  ret <32 x i16> %res
+  %1 = bitcast i32 %mask to <32 x i1>
+  %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> %passthru, <32 x i1> %1)
+  ret <32 x i16> %2
 }
 
 define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
@@ -254,66 +271,62 @@ define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 ; X64-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
-  ret <32 x i16> %res
+  %1 = bitcast i32 %mask to <32 x i1>
+  %2 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> zeroinitializer, <32 x i1> %1)
+  ret <32 x i16> %2
 }
 
 define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
 ; CHECK-LABEL: test_compress_w_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
-  ret <32 x i16> %res
+  %1 = call <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16> %data, <32 x i16> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <32 x i16> %1
 }
 
-declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
-
-define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) {
-; X86-LABEL: test_compress_store_w_512:
+define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
+; X86-LABEL: test_mask_compress_store_b_512:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
-; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
+; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
+; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
-; X64-LABEL: test_compress_store_w_512:
+; X64-LABEL: test_mask_compress_store_b_512:
 ; X64:       # %bb.0:
-; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
-; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
+; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
+; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %1 = bitcast i8* %addr to i16*
-  call void @llvm.masked.compressstore.v32i16(<32 x i16> %data, i16* %1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  %1 = bitcast i64 %mask to <64 x i1>
+  call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> %1)
   ret void
 }
 
-define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
-; X86-LABEL: test_mask_compress_store_b_512:
+define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) {
+; X86-LABEL: test_compress_store_b_512:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
+; X86-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
 ; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
-; X64-LABEL: test_mask_compress_store_b_512:
+; X64-LABEL: test_compress_store_b_512:
 ; X64:       # %bb.0:
-; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
+; X64-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
 ; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %1 = bitcast i64 %mask to <64 x i1>
-  call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> %1)
+  call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
   ret void
 }
 
 define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
 ; X86-LABEL: test_mask_compress_b_512:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
-; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT:    kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
+; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
 ; X86-NEXT:    vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
 ; X86-NEXT:    retl # encoding: [0xc3]
@@ -324,16 +337,15 @@ define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru,
 ; X64-NEXT:    vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
-  ret <64 x i8> %res
+  %1 = bitcast i64 %mask to <64 x i1>
+  %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> %passthru, <64 x i1> %1)
+  ret <64 x i8> %2
 }
 
 define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
 ; X86-LABEL: test_maskz_compress_b_512:
 ; X86:       # %bb.0:
-; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
-; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
-; X86-NEXT:    kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
+; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
 ; X86-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
 ; X86-NEXT:    retl # encoding: [0xc3]
 ;
@@ -342,37 +354,17 @@ define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
 ; X64-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
-  ret <64 x i8> %res
+  %1 = bitcast i64 %mask to <64 x i1>
+  %2 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> zeroinitializer, <64 x i1> %1)
+  ret <64 x i8> %2
 }
 
 define <64 x i8> @test_compress_b_512(<64 x i8> %data) {
 ; CHECK-LABEL: test_compress_b_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
-  ret <64 x i8> %res
-}
-
-declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
-
-define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) {
-; X86-LABEL: test_compress_store_b_512:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
-; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
-; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X86-NEXT:    retl # encoding: [0xc3]
-;
-; X64-LABEL: test_compress_store_b_512:
-; X64:       # %bb.0:
-; X64-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
-; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
-; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
-; X64-NEXT:    retq # encoding: [0xc3]
-  call void @llvm.masked.compressstore.v64i8(<64 x i8> %data, i8* %addr, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
-  ret void
+  %1 = call <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8> %data, <64 x i8> undef, <64 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <64 x i8> %1
 }
 
 define <16 x i32> @test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
@@ -761,3 +753,7 @@ declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>)
 declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>)
 declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>)
 declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>)
+declare <32 x i16> @llvm.x86.avx512.mask.expand.v32i16(<32 x i16>, <32 x i16>, <32 x i1>)
+declare <64 x i8> @llvm.x86.avx512.mask.expand.v64i8(<64 x i8>, <64 x i8>, <64 x i1>)
+declare <32 x i16> @llvm.x86.avx512.mask.compress.v32i16(<32 x i16>, <32 x i16>, <32 x i1>)
+declare <64 x i8> @llvm.x86.avx512.mask.compress.v64i8(<64 x i8>, <64 x i8>, <64 x i1>)
diff --git a/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll
index 760212f746c..9c4ff050316 100644
--- a/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-upgrade.ll
@@ -57,6 +57,52 @@ define <8 x i16> @test_expand_load_w_128(i8* %addr, <8 x i16> %data) {
   ret <8 x i16> %res
 }
 
+define <8 x i16> @test_expand_w_128(<8 x i16> %data) {
+; CHECK-LABEL: test_expand_w_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_w_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
+; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_w_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
+; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_w_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_w_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
+  ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
+
 define <16 x i8> @test_mask_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
 ; X86-LABEL: test_mask_expand_load_b_128:
 ; X86:       # %bb.0:
@@ -110,6 +156,50 @@ define <16 x i8> @test_expand_load_b_128(i8* %addr, <16 x i8> %data) {
   ret <16 x i8> %res
 }
 
+define <16 x i8> @test_expand_b_128(<16 x i8> %data) {
+; CHECK-LABEL: test_expand_b_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_expand_b_128:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
+; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_b_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
+; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
+; X86-LABEL: test_maskz_expand_b_128:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_b_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
+  ret <16 x i8> %res
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
+
 define void @test_mask_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
 ; X86-LABEL: test_mask_compress_store_w_128:
 ; X86:       # %bb.0:
@@ -147,6 +237,52 @@ define void @test_compress_store_w_128(i8* %addr, <8 x i16> %data) {
   ret void
 }
 
+define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_w_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
+; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_w_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
+; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_w_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_w_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
+  ret <8 x i16> %res
+}
+
+define <8 x i16> @test_compress_w_128(<8 x i16> %data) {
+; CHECK-LABEL: test_compress_w_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
+  ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
+
 define void @test_mask_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
 ; X86-LABEL: test_mask_compress_store_b_128:
 ; X86:       # %bb.0:
@@ -183,6 +319,50 @@ define void @test_compress_store_b_128(i8* %addr, <16 x i8> %data) {
   ret void
 }
 
+define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_compress_b_128:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
+; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_b_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
+; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) {
+; X86-LABEL: test_maskz_compress_b_128:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_b_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
+  ret <16 x i8> %res
+}
+
+define <16 x i8> @test_compress_b_128(<16 x i8> %data) {
+; CHECK-LABEL: test_compress_b_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
+  ret <16 x i8> %res
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
+
 define <16 x i16> @test_mask_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
 ; X86-LABEL: test_mask_expand_load_w_256:
 ; X86:       # %bb.0:
@@ -236,6 +416,50 @@ define <16 x i16> @test_expand_load_w_256(i8* %addr, <16 x i16> %data) {
   ret <16 x i16> %res
 }
 
+define <16 x i16> @test_expand_w_256(<16 x i16> %data) {
+; CHECK-LABEL: test_expand_w_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_expand_w_256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
+; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_w_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
+; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
+; X86-LABEL: test_maskz_expand_w_256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_w_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
+  ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
+
 define <32 x i8> @test_mask_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
 ; X86-LABEL: test_mask_expand_load_b_256:
 ; X86:       # %bb.0:
@@ -289,6 +513,50 @@ define <32 x i8> @test_expand_load_b_256(i8* %addr, <32 x i8> %data) {
   ret <32 x i8> %res
 }
 
+define <32 x i8> @test_expand_b_256(<32 x i8> %data) {
+; CHECK-LABEL: test_expand_b_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
+  ret <32 x i8> %res
+}
+
+define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
+; X86-LABEL: test_mask_expand_b_256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
+; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_b_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
+; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
+  ret <32 x i8> %res
+}
+
+define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
+; X86-LABEL: test_maskz_expand_b_256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_b_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
+  ret <32 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
+
 define void @test_mask_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
 ; X86-LABEL: test_mask_compress_store_w_256:
 ; X86:       # %bb.0:
@@ -329,6 +597,50 @@ define void @test_compress_store_w_256(i8* %addr, <16 x i16> %data) {
   ret void
 }
 
+define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
+; X86-LABEL: test_mask_compress_w_256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
+; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_w_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
+; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) {
+; X86-LABEL: test_maskz_compress_w_256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_w_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
+  ret <16 x i16> %res
+}
+
+define <16 x i16> @test_compress_w_256(<16 x i16> %data) {
+; CHECK-LABEL: test_compress_w_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
+  ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
+
 define void @test_mask_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
 ; X86-LABEL: test_mask_compress_store_b_256:
 ; X86:       # %bb.0:
@@ -369,6 +681,50 @@ define void @test_compress_store_b_256(i8* %addr, <32 x i8> %data) {
   ret void
 }
 
+define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
+; X86-LABEL: test_mask_compress_b_256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
+; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_b_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
+; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
+  ret <32 x i8> %res
+}
+
+define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) {
+; X86-LABEL: test_maskz_compress_b_256:
+; X86:       # %bb.0:
+; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_b_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
+  ret <32 x i8> %res
+}
+
+define <32 x i8> @test_compress_b_256(<32 x i8> %data) {
+; CHECK-LABEL: test_compress_b_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
+  ret <32 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
+
 define <4 x i32>@test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
 ; X86:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll
index 163164d1f1d..371499c476e 100644
--- a/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll
@@ -64,8 +64,8 @@ define <8 x i16> @test_expand_w_128(<8 x i16> %data) {
 ; CHECK-LABEL: test_expand_w_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
-  ret <8 x i16> %res
+  %1 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <8 x i16> %1
 }
 
 define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask) {
@@ -83,8 +83,9 @@ define <8 x i16> @test_mask_expand_w_128(<8 x i16> %data, <8 x i16> %passthru, i
 ; X64-NEXT:    vpexpandw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x62,0xc8]
 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
-  ret <8 x i16> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1)
+  ret <8 x i16> %2
 }
 
 define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
@@ -100,12 +101,11 @@ define <8 x i16> @test_maskz_expand_w_128(<8 x i16> %data, i8 %mask) {
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 ; X64-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x62,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
-  ret <8 x i16> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1)
+  ret <8 x i16> %2
 }
 
-declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
-
 define <16 x i8> @test_mask_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
 ; X86-LABEL: test_mask_expand_load_b_128:
 ; X86:       # %bb.0:
@@ -163,8 +163,8 @@ define <16 x i8> @test_expand_b_128(<16 x i8> %data) {
 ; CHECK-LABEL: test_expand_b_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
-  ret <16 x i8> %res
+  %1 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <16 x i8> %1
 }
 
 define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask) {
@@ -181,8 +181,9 @@ define <16 x i8> @test_mask_expand_b_128(<16 x i8> %data, <16 x i8> %passthru, i
 ; X64-NEXT:    vpexpandb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x62,0xc8]
 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
-  ret <16 x i8> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1)
+  ret <16 x i8> %2
 }
 
 define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
@@ -197,12 +198,11 @@ define <16 x i8> @test_maskz_expand_b_128(<16 x i8> %data, i16 %mask) {
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 ; X64-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x62,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
-  ret <16 x i8> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1)
+  ret <16 x i8> %2
 }
 
-declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
-
 define void @test_mask_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
 ; X86-LABEL: test_mask_compress_store_w_128:
 ; X86:       # %bb.0:
@@ -256,8 +256,9 @@ define <8 x i16> @test_mask_compress_w_128(<8 x i16> %data, <8 x i16> %passthru,
 ; X64-NEXT:    vpcompressw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x63,0xc1]
 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %passthru, i8 %mask)
-  ret <8 x i16> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> %passthru, <8 x i1> %1)
+  ret <8 x i16> %2
 }
 
 define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) {
@@ -273,20 +274,19 @@ define <8 x i16> @test_maskz_compress_w_128(<8 x i16> %data, i8 %mask) {
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 ; X64-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x63,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
-  ret <8 x i16> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> zeroinitializer, <8 x i1> %1)
+  ret <8 x i16> %2
 }
 
 define <8 x i16> @test_compress_w_128(<8 x i16> %data) {
 ; CHECK-LABEL: test_compress_w_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> undef, i8 -1)
-  ret <8 x i16> %res
+  %1 = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16> %data, <8 x i16> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <8 x i16> %1
 }
 
-declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> %src0, i8 %mask)
-
 define void @test_mask_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
 ; X86-LABEL: test_mask_compress_store_b_128:
 ; X86:       # %bb.0:
@@ -336,8 +336,9 @@ define <16 x i8> @test_mask_compress_b_128(<16 x i8> %data, <16 x i8> %passthru,
 ; X64-NEXT:    vpcompressb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x63,0xc1]
 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %passthru, i16 %mask)
-  ret <16 x i8> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> %passthru, <16 x i1> %1)
+  ret <16 x i8> %2
 }
 
 define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) {
@@ -352,20 +353,19 @@ define <16 x i8> @test_maskz_compress_b_128(<16 x i8> %data, i16 %mask) {
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 ; X64-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x63,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
-  ret <16 x i8> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> zeroinitializer, <16 x i1> %1)
+  ret <16 x i8> %2
 }
 
 define <16 x i8> @test_compress_b_128(<16 x i8> %data) {
 ; CHECK-LABEL: test_compress_b_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> undef, i16 -1)
-  ret <16 x i8> %res
+  %1 = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8> %data, <16 x i8> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <16 x i8> %1
 }
 
-declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> %src0, i16 %mask)
-
 define <16 x i16> @test_mask_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
 ; X86-LABEL: test_mask_expand_load_w_256:
 ; X86:       # %bb.0:
@@ -426,8 +426,8 @@ define <16 x i16> @test_expand_w_256(<16 x i16> %data) {
 ; CHECK-LABEL: test_expand_w_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
-  ret <16 x i16> %res
+  %1 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <16 x i16> %1
 }
 
 define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask) {
@@ -444,8 +444,9 @@ define <16 x i16> @test_mask_expand_w_256(<16 x i16> %data, <16 x i16> %passthru
 ; X64-NEXT:    vpexpandw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x62,0xc8]
 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
-  ret <16 x i16> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1)
+  ret <16 x i16> %2
 }
 
 define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
@@ -460,12 +461,11 @@ define <16 x i16> @test_maskz_expand_w_256(<16 x i16> %data, i16 %mask) {
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 ; X64-NEXT:    vpexpandw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x62,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
-  ret <16 x i16> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1)
+  ret <16 x i16> %2
 }
 
-declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
-
 define <32 x i8> @test_mask_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
 ; X86-LABEL: test_mask_expand_load_b_256:
 ; X86:       # %bb.0:
@@ -523,8 +523,8 @@ define <32 x i8> @test_expand_b_256(<32 x i8> %data) {
 ; CHECK-LABEL: test_expand_b_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
-  ret <32 x i8> %res
+  %1 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <32 x i8> %1
 }
 
 define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask) {
@@ -541,8 +541,9 @@ define <32 x i8> @test_mask_expand_b_256(<32 x i8> %data, <32 x i8> %passthru, i
 ; X64-NEXT:    vpexpandb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x62,0xc8]
 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
-  ret <32 x i8> %res
+  %1 = bitcast i32 %mask to <32 x i1>
+  %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1)
+  ret <32 x i8> %2
 }
 
 define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
@@ -557,12 +558,11 @@ define <32 x i8> @test_maskz_expand_b_256(<32 x i8> %data, i32 %mask) {
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 ; X64-NEXT:    vpexpandb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x62,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
-  ret <32 x i8> %res
+  %1 = bitcast i32 %mask to <32 x i1>
+  %2 = call <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1)
+  ret <32 x i8> %2
 }
 
-declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
-
 define void @test_mask_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
 ; X86-LABEL: test_mask_compress_store_w_256:
 ; X86:       # %bb.0:
@@ -618,8 +618,9 @@ define <16 x i16> @test_mask_compress_w_256(<16 x i16> %data, <16 x i16> %passth
 ; X64-NEXT:    vpcompressw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x63,0xc1]
 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %passthru, i16 %mask)
-  ret <16 x i16> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> %passthru, <16 x i1> %1)
+  ret <16 x i16> %2
 }
 
 define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) {
@@ -634,20 +635,19 @@ define <16 x i16> @test_maskz_compress_w_256(<16 x i16> %data, i16 %mask) {
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 ; X64-NEXT:    vpcompressw %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x63,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> zeroinitializer, i16 %mask)
-  ret <16 x i16> %res
+  %1 = bitcast i16 %mask to <16 x i1>
+  %2 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> zeroinitializer, <16 x i1> %1)
+  ret <16 x i16> %2
 }
 
 define <16 x i16> @test_compress_w_256(<16 x i16> %data) {
 ; CHECK-LABEL: test_compress_w_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> undef, i16 -1)
-  ret <16 x i16> %res
+  %1 = call <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16> %data, <16 x i16> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <16 x i16> %1
 }
 
-declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src0, i16 %mask)
-
 define void @test_mask_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
 ; X86-LABEL: test_mask_compress_store_b_256:
 ; X86:       # %bb.0:
@@ -701,8 +701,9 @@ define <32 x i8> @test_mask_compress_b_256(<32 x i8> %data, <32 x i8> %passthru,
 ; X64-NEXT:    vpcompressb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x63,0xc1]
 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %passthru, i32 %mask)
-  ret <32 x i8> %res
+  %1 = bitcast i32 %mask to <32 x i1>
+  %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> %passthru, <32 x i1> %1)
+  ret <32 x i8> %2
 }
 
 define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) {
@@ -717,20 +718,19 @@ define <32 x i8> @test_maskz_compress_b_256(<32 x i8> %data, i32 %mask) {
 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
 ; X64-NEXT:    vpcompressb %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x63,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> zeroinitializer, i32 %mask)
-  ret <32 x i8> %res
+  %1 = bitcast i32 %mask to <32 x i1>
+  %2 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> zeroinitializer, <32 x i1> %1)
+  ret <32 x i8> %2
 }
 
 define <32 x i8> @test_compress_b_256(<32 x i8> %data) {
 ; CHECK-LABEL: test_compress_b_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> undef, i32 -1)
-  ret <32 x i8> %res
+  %1 = call <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8> %data, <32 x i8> undef, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <32 x i8> %1
 }
 
-declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src0, i32 %mask)
-
 define <4 x i32> @test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
 ; X86:       # %bb.0:
@@ -1551,3 +1551,11 @@ declare <16 x i16> @llvm.masked.expandload.v16i16(i16*, <16 x i1>, <16 x i16>)
 declare <32 x i8> @llvm.masked.expandload.v32i8(i8*, <32 x i1>, <32 x i8>)
 declare void @llvm.masked.compressstore.v16i16(<16 x i16>, i16*, <16 x i1>)
 declare void @llvm.masked.compressstore.v32i8(<32 x i8>, i8*, <32 x i1>)
+declare <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16>, <8 x i16>, <8 x i1>)
+declare <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8>, <16 x i8>, <16 x i1>)
+declare <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x i16>, <8 x i16>, <8 x i1>)
+declare <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x i8>, <16 x i8>, <16 x i1>)
+declare <16 x i16> @llvm.x86.avx512.mask.expand.v16i16(<16 x i16>, <16 x i16>, <16 x i1>)
+declare <32 x i8> @llvm.x86.avx512.mask.expand.v32i8(<32 x i8>, <32 x i8>, <32 x i1>)
+declare <16 x i16> @llvm.x86.avx512.mask.compress.v16i16(<16 x i16>, <16 x i16>, <16 x i1>)
+declare <32 x i8> @llvm.x86.avx512.mask.compress.v32i8(<32 x i8>, <32 x i8>, <32 x i1>)
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
index 1e46a41d59b..36a63224e20 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
@@ -14578,10 +14578,746 @@ define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %
 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; X64-NEXT:    retq # encoding: [0xc3]
-    %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
-    %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
-    %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
-    %res3 = add <4 x i32> %res0, %res1
-    %res4 = add <4 x i32> %res3, %res2
-    ret <4 x i32> %res4
+  %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
+  %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+  %res3 = add <4 x i32> %res0, %res1
+  %res4 = add <4 x i32> %res3, %res2
+  ret <4 x i32> %res4
+}
+
+define <2 x double> @test_mask_compress_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_pd_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
+; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_pd_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
+; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_compress_pd_128(<2 x double> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_pd_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_pd_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_compress_pd_128(<2 x double> %data) {
+; CHECK-LABEL: test_compress_pd_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
+  ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
+
+define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_ps_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
+; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_ps_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
+; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_compress_ps_128(<4 x float> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_ps_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_ps_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_compress_ps_128(<4 x float> %data) {
+; CHECK-LABEL: test_compress_ps_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
+  ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
+
+define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_q_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
+; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_q_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
+; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_maskz_compress_q_128(<2 x i64> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_q_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_q_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_compress_q_128(<2 x i64> %data) {
+; CHECK-LABEL: test_compress_q_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
+  ret <2 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
+
+define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_d_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
+; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_d_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
+; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_maskz_compress_d_128(<4 x i32> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_d_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_d_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_compress_d_128(<4 x i32> %data) {
+; CHECK-LABEL: test_compress_d_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
+  ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
+
+define <2 x double> @test_expand_pd_128(<2 x double> %data) {
+; CHECK-LABEL: test_expand_pd_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_expand_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_pd_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
+; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_pd_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
+; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_pd_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_pd_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
+  ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
+
+define <4 x float> @test_expand_ps_128(<4 x float> %data) {
+; CHECK-LABEL: test_expand_ps_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_expand_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_ps_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
+; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_ps_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
+; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_ps_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_ps_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
+  ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
+
+define <2 x i64> @test_expand_q_128(<2 x i64> %data) {
+; CHECK-LABEL: test_expand_q_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mask_expand_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_q_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
+; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_q_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
+; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_q_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_q_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
+  ret <2 x i64> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
+
+define <4 x i32> @test_expand_d_128(<4 x i32> %data) {
+; CHECK-LABEL: test_expand_d_128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_mask_expand_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_d_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
+; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_d_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
+; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_d_128:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_d_128:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
+  ret <4 x i32> %res
 }
+
+declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
+
+define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_pd_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
+; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_pd_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
+; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_maskz_compress_pd_256(<4 x double> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_pd_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_pd_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_compress_pd_256(<4 x double> %data) {
+; CHECK-LABEL: test_compress_pd_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
+  ret <4 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
+
+define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_ps_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
+; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_ps_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
+; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_maskz_compress_ps_256(<8 x float> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_ps_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_ps_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_compress_ps_256(<8 x float> %data) {
+; CHECK-LABEL: test_compress_ps_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
+  ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
+
+define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_q_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
+; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_q_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
+; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_maskz_compress_q_256(<4 x i64> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_q_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_q_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_compress_q_256(<4 x i64> %data) {
+; CHECK-LABEL: test_compress_q_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
+  ret <4 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
+
+define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_compress_d_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
+; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_compress_d_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
+; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_maskz_compress_d_256(<8 x i32> %data, i8 %mask) {
+; X86-LABEL: test_maskz_compress_d_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_compress_d_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_compress_d_256(<8 x i32> %data) {
+; CHECK-LABEL: test_compress_d_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
+  ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
+
+define <4 x double> @test_expand_pd_256(<4 x double> %data) {
+; CHECK-LABEL: test_expand_pd_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_mask_expand_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_pd_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
+; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_pd_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
+; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_pd_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_pd_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
+  ret <4 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
+
+define <8 x float> @test_expand_ps_256(<8 x float> %data) {
+; CHECK-LABEL: test_expand_ps_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_mask_expand_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_ps_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
+; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_ps_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
+; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_ps_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_ps_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
+  ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
+
+define <4 x i64> @test_expand_q_256(<4 x i64> %data) {
+; CHECK-LABEL: test_expand_q_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_mask_expand_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_q_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
+; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_q_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
+; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
+  ret <4 x i64> %res
+}
+
+define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_q_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_q_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
+  ret <4 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
+
+define <8 x i32> @test_expand_d_256(<8 x i32> %data) {
+; CHECK-LABEL: test_expand_d_256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_mask_expand_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
+; X86-LABEL: test_mask_expand_d_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
+; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mask_expand_d_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
+; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) {
+; X86-LABEL: test_maskz_expand_d_256:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+;
+; X64-LABEL: test_maskz_expand_d_256:
+; X64:       # %bb.0:
+; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X64-NEXT:    vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
+; X64-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
+  ret <8 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index c275f4b739c..b5dfa2f11ad 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -17,8 +17,10 @@ define <2 x double> @test_mask_compress_pd_128(<2 x double> %data, <2 x double>
 ; X64-NEXT:    vcompresspd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0xc1]
 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
-  ret <2 x double> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+  %2 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> %passthru, <2 x i1> %extract)
+  ret <2 x double> %2
 }
 
 define <2 x double> @test_maskz_compress_pd_128(<2 x double> %data, i8 %mask) {
@@ -34,20 +36,20 @@ define <2 x double> @test_maskz_compress_pd_128(<2 x double> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vcompresspd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8a,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
-  ret <2 x double> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+  %2 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> zeroinitializer, <2 x i1> %extract)
+  ret <2 x double> %2
 }
 
 define <2 x double> @test_compress_pd_128(<2 x double> %data) {
 ; CHECK-LABEL: test_compress_pd_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
-  ret <2 x double> %res
+  %1 = call <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double> %data, <2 x double> undef, <2 x i1> <i1 true, i1 true>)
+  ret <2 x double> %1
 }
 
-declare <2 x double> @llvm.x86.avx512.mask.compress.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
-
 define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
 ; X86-LABEL: test_mask_compress_ps_128:
 ; X86:       # %bb.0:
@@ -63,8 +65,10 @@ define <4 x float> @test_mask_compress_ps_128(<4 x float> %data, <4 x float> %pa
 ; X64-NEXT:    vcompressps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0xc1]
 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
-  ret <4 x float> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> %passthru, <4 x i1> %extract)
+  ret <4 x float> %2
 }
 
 define <4 x float> @test_maskz_compress_ps_128(<4 x float> %data, i8 %mask) {
@@ -80,20 +84,20 @@ define <4 x float> @test_maskz_compress_ps_128(<4 x float> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vcompressps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8a,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
-  ret <4 x float> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> zeroinitializer, <4 x i1> %extract)
+  ret <4 x float> %2
 }
 
 define <4 x float> @test_compress_ps_128(<4 x float> %data) {
 ; CHECK-LABEL: test_compress_ps_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
-  ret <4 x float> %res
+  %1 = call <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float> %data, <4 x float> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+  ret <4 x float> %1
 }
 
-declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
-
 define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
 ; X86-LABEL: test_mask_compress_q_128:
 ; X86:       # %bb.0:
@@ -109,8 +113,10 @@ define <2 x i64> @test_mask_compress_q_128(<2 x i64> %data, <2 x i64> %passthru,
 ; X64-NEXT:    vpcompressq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0xc1]
 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
-  ret <2 x i64> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+  %2 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> %passthru, <2 x i1> %extract)
+  ret <2 x i64> %2
 }
 
 define <2 x i64> @test_maskz_compress_q_128(<2 x i64> %data, i8 %mask) {
@@ -126,20 +132,20 @@ define <2 x i64> @test_maskz_compress_q_128(<2 x i64> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vpcompressq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x8b,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
-  ret <2 x i64> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+  %2 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> zeroinitializer, <2 x i1> %extract)
+  ret <2 x i64> %2
 }
 
 define <2 x i64> @test_compress_q_128(<2 x i64> %data) {
 ; CHECK-LABEL: test_compress_q_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
-  ret <2 x i64> %res
+  %1 = call <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64> %data, <2 x i64> undef, <2 x i1> <i1 true, i1 true>)
+  ret <2 x i64> %1
 }
 
-declare <2 x i64> @llvm.x86.avx512.mask.compress.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
-
 define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
 ; X86-LABEL: test_mask_compress_d_128:
 ; X86:       # %bb.0:
@@ -155,8 +161,10 @@ define <4 x i32> @test_mask_compress_d_128(<4 x i32> %data, <4 x i32> %passthru,
 ; X64-NEXT:    vpcompressd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0xc1]
 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
-  ret <4 x i32> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> %passthru, <4 x i1> %extract)
+  ret <4 x i32> %2
 }
 
 define <4 x i32> @test_maskz_compress_d_128(<4 x i32> %data, i8 %mask) {
@@ -172,26 +180,26 @@ define <4 x i32> @test_maskz_compress_d_128(<4 x i32> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vpcompressd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
-  ret <4 x i32> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> zeroinitializer, <4 x i1> %extract)
+  ret <4 x i32> %2
 }
 
 define <4 x i32> @test_compress_d_128(<4 x i32> %data) {
 ; CHECK-LABEL: test_compress_d_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
-  ret <4 x i32> %res
+  %1 = call <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32> %data, <4 x i32> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+  ret <4 x i32> %1
 }
 
-declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
-
 define <2 x double> @test_expand_pd_128(<2 x double> %data) {
 ; CHECK-LABEL: test_expand_pd_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> undef, i8 -1)
-  ret <2 x double> %res
+  %1 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> undef, <2 x i1> <i1 true, i1 true>)
+  ret <2 x double> %1
 }
 
 define <2 x double> @test_mask_expand_pd_128(<2 x double> %data, <2 x double> %passthru, i8 %mask) {
@@ -209,8 +217,10 @@ define <2 x double> @test_mask_expand_pd_128(<2 x double> %data, <2 x double> %p
 ; X64-NEXT:    vexpandpd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0xc8]
 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %passthru, i8 %mask)
-  ret <2 x double> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+  %2 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> %passthru, <2 x i1> %extract)
+  ret <2 x double> %2
 }
 
 define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) {
@@ -226,18 +236,18 @@ define <2 x double> @test_maskz_expand_pd_128(<2 x double> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vexpandpd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> zeroinitializer, i8 %mask)
-  ret <2 x double> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+  %2 = call <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double> %data, <2 x double> zeroinitializer, <2 x i1> %extract)
+  ret <2 x double> %2
 }
 
-declare <2 x double> @llvm.x86.avx512.mask.expand.pd.128(<2 x double> %data, <2 x double> %src0, i8 %mask)
-
 define <4 x float> @test_expand_ps_128(<4 x float> %data) {
 ; CHECK-LABEL: test_expand_ps_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> undef, i8 -1)
-  ret <4 x float> %res
+  %1 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+  ret <4 x float> %1
 }
 
 define <4 x float> @test_mask_expand_ps_128(<4 x float> %data, <4 x float> %passthru, i8 %mask) {
@@ -255,8 +265,10 @@ define <4 x float> @test_mask_expand_ps_128(<4 x float> %data, <4 x float> %pass
 ; X64-NEXT:    vexpandps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0xc8]
 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %passthru, i8 %mask)
-  ret <4 x float> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> %passthru, <4 x i1> %extract)
+  ret <4 x float> %2
 }
 
 define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) {
@@ -272,18 +284,18 @@ define <4 x float> @test_maskz_expand_ps_128(<4 x float> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vexpandps %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> zeroinitializer, i8 %mask)
-  ret <4 x float> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float> %data, <4 x float> zeroinitializer, <4 x i1> %extract)
+  ret <4 x float> %2
 }
 
-declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
-
 define <2 x i64> @test_expand_q_128(<2 x i64> %data) {
 ; CHECK-LABEL: test_expand_q_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> undef, i8 -1)
-  ret <2 x i64> %res
+  %1 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> undef, <2 x i1> <i1 true, i1 true>)
+  ret <2 x i64> %1
 }
 
 define <2 x i64> @test_mask_expand_q_128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask) {
@@ -301,8 +313,10 @@ define <2 x i64> @test_mask_expand_q_128(<2 x i64> %data, <2 x i64> %passthru, i
 ; X64-NEXT:    vpexpandq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0xc8]
 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %passthru, i8 %mask)
-  ret <2 x i64> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+  %2 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> %passthru, <2 x i1> %extract)
+  ret <2 x i64> %2
 }
 
 define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) {
@@ -318,18 +332,18 @@ define <2 x i64> @test_maskz_expand_q_128(<2 x i64> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vpexpandq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> zeroinitializer, i8 %mask)
-  ret <2 x i64> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <2 x i32> <i32 0, i32 1>
+  %2 = call <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64> %data, <2 x i64> zeroinitializer, <2 x i1> %extract)
+  ret <2 x i64> %2
 }
 
-declare <2 x i64> @llvm.x86.avx512.mask.expand.q.128(<2 x i64> %data, <2 x i64> %src0, i8 %mask)
-
 define <4 x i32> @test_expand_d_128(<4 x i32> %data) {
 ; CHECK-LABEL: test_expand_d_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> undef, i8 -1)
-  ret <4 x i32> %res
+  %1 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+  ret <4 x i32> %1
 }
 
 define <4 x i32> @test_mask_expand_d_128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask) {
@@ -347,8 +361,10 @@ define <4 x i32> @test_mask_expand_d_128(<4 x i32> %data, <4 x i32> %passthru, i
 ; X64-NEXT:    vpexpandd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0xc8]
 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %passthru, i8 %mask)
-  ret <4 x i32> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> %passthru, <4 x i1> %extract)
+  ret <4 x i32> %2
 }
 
 define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) {
@@ -364,12 +380,12 @@ define <4 x i32> @test_maskz_expand_d_128(<4 x i32> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vpexpandd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> zeroinitializer, i8 %mask)
-  ret <4 x i32> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32> %data, <4 x i32> zeroinitializer, <4 x i1> %extract)
+  ret <4 x i32> %2
 }
 
-declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
-
 define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
 ; X86-LABEL: test_mask_compress_pd_256:
 ; X86:       # %bb.0:
@@ -385,8 +401,10 @@ define <4 x double> @test_mask_compress_pd_256(<4 x double> %data, <4 x double>
 ; X64-NEXT:    vcompresspd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
-  ret <4 x double> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> %passthru, <4 x i1> %extract)
+  ret <4 x double> %2
 }
 
 define <4 x double> @test_maskz_compress_pd_256(<4 x double> %data, i8 %mask) {
@@ -402,20 +420,20 @@ define <4 x double> @test_maskz_compress_pd_256(<4 x double> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vcompresspd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
-  ret <4 x double> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> zeroinitializer, <4 x i1> %extract)
+  ret <4 x double> %2
 }
 
 define <4 x double> @test_compress_pd_256(<4 x double> %data) {
 ; CHECK-LABEL: test_compress_pd_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
-  ret <4 x double> %res
+  %1 = call <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double> %data, <4 x double> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+  ret <4 x double> %1
 }
 
-declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
-
 define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
 ; X86-LABEL: test_mask_compress_ps_256:
 ; X86:       # %bb.0:
@@ -431,8 +449,9 @@ define <8 x float> @test_mask_compress_ps_256(<8 x float> %data, <8 x float> %pa
 ; X64-NEXT:    vcompressps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0xc1]
 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
-  ret <8 x float> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> %passthru, <8 x i1> %1)
+  ret <8 x float> %2
 }
 
 define <8 x float> @test_maskz_compress_ps_256(<8 x float> %data, i8 %mask) {
@@ -448,20 +467,19 @@ define <8 x float> @test_maskz_compress_ps_256(<8 x float> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vcompressps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
-  ret <8 x float> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> zeroinitializer, <8 x i1> %1)
+  ret <8 x float> %2
 }
 
 define <8 x float> @test_compress_ps_256(<8 x float> %data) {
 ; CHECK-LABEL: test_compress_ps_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
-  ret <8 x float> %res
+  %1 = call <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float> %data, <8 x float> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <8 x float> %1
 }
 
-declare <8 x float> @llvm.x86.avx512.mask.compress.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
-
 define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
 ; X86-LABEL: test_mask_compress_q_256:
 ; X86:       # %bb.0:
@@ -477,8 +495,10 @@ define <4 x i64> @test_mask_compress_q_256(<4 x i64> %data, <4 x i64> %passthru,
 ; X64-NEXT:    vpcompressq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0xc1]
 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
-  ret <4 x i64> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> %passthru, <4 x i1> %extract)
+  ret <4 x i64> %2
 }
 
 define <4 x i64> @test_maskz_compress_q_256(<4 x i64> %data, i8 %mask) {
@@ -494,20 +514,20 @@ define <4 x i64> @test_maskz_compress_q_256(<4 x i64> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vpcompressq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
-  ret <4 x i64> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> zeroinitializer, <4 x i1> %extract)
+  ret <4 x i64> %2
 }
 
 define <4 x i64> @test_compress_q_256(<4 x i64> %data) {
 ; CHECK-LABEL: test_compress_q_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
-  ret <4 x i64> %res
+  %1 = call <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64> %data, <4 x i64> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+  ret <4 x i64> %1
 }
 
-declare <4 x i64> @llvm.x86.avx512.mask.compress.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
-
 define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
 ; X86-LABEL: test_mask_compress_d_256:
 ; X86:       # %bb.0:
@@ -523,8 +543,9 @@ define <8 x i32> @test_mask_compress_d_256(<8 x i32> %data, <8 x i32> %passthru,
 ; X64-NEXT:    vpcompressd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0xc1]
 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
-  ret <8 x i32> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> %passthru, <8 x i1> %1)
+  ret <8 x i32> %2
 }
 
 define <8 x i32> @test_maskz_compress_d_256(<8 x i32> %data, i8 %mask) {
@@ -540,26 +561,25 @@ define <8 x i32> @test_maskz_compress_d_256(<8 x i32> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vpcompressd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
-  ret <8 x i32> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> zeroinitializer, <8 x i1> %1)
+  ret <8 x i32> %2
 }
 
 define <8 x i32> @test_compress_d_256(<8 x i32> %data) {
 ; CHECK-LABEL: test_compress_d_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
-  ret <8 x i32> %res
+  %1 = call <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32> %data, <8 x i32> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <8 x i32> %1
 }
 
-declare <8 x i32> @llvm.x86.avx512.mask.compress.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
-
 define <4 x double> @test_expand_pd_256(<4 x double> %data) {
 ; CHECK-LABEL: test_expand_pd_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> undef, i8 -1)
-  ret <4 x double> %res
+  %1 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+  ret <4 x double> %1
 }
 
 define <4 x double> @test_mask_expand_pd_256(<4 x double> %data, <4 x double> %passthru, i8 %mask) {
@@ -577,8 +597,10 @@ define <4 x double> @test_mask_expand_pd_256(<4 x double> %data, <4 x double> %p
 ; X64-NEXT:    vexpandpd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %passthru, i8 %mask)
-  ret <4 x double> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> %passthru, <4 x i1> %extract)
+  ret <4 x double> %2
 }
 
 define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) {
@@ -594,18 +616,18 @@ define <4 x double> @test_maskz_expand_pd_256(<4 x double> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vexpandpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> zeroinitializer, i8 %mask)
-  ret <4 x double> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double> %data, <4 x double> zeroinitializer, <4 x i1> %extract)
+  ret <4 x double> %2
 }
 
-declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
-
 define <8 x float> @test_expand_ps_256(<8 x float> %data) {
 ; CHECK-LABEL: test_expand_ps_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> undef, i8 -1)
-  ret <8 x float> %res
+  %1 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <8 x float> %1
 }
 
 define <8 x float> @test_mask_expand_ps_256(<8 x float> %data, <8 x float> %passthru, i8 %mask) {
@@ -623,8 +645,9 @@ define <8 x float> @test_mask_expand_ps_256(<8 x float> %data, <8 x float> %pass
 ; X64-NEXT:    vexpandps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0xc8]
 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %passthru, i8 %mask)
-  ret <8 x float> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> %passthru, <8 x i1> %1)
+  ret <8 x float> %2
 }
 
 define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) {
@@ -640,18 +663,17 @@ define <8 x float> @test_maskz_expand_ps_256(<8 x float> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vexpandps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> zeroinitializer, i8 %mask)
-  ret <8 x float> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float> %data, <8 x float> zeroinitializer, <8 x i1> %1)
+  ret <8 x float> %2
 }
 
-declare <8 x float> @llvm.x86.avx512.mask.expand.ps.256(<8 x float> %data, <8 x float> %src0, i8 %mask)
-
 define <4 x i64> @test_expand_q_256(<4 x i64> %data) {
 ; CHECK-LABEL: test_expand_q_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> undef, i8 -1)
-  ret <4 x i64> %res
+  %1 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+  ret <4 x i64> %1
 }
 
 define <4 x i64> @test_mask_expand_q_256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask) {
@@ -669,8 +691,10 @@ define <4 x i64> @test_mask_expand_q_256(<4 x i64> %data, <4 x i64> %passthru, i
 ; X64-NEXT:    vpexpandq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0xc8]
 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %passthru, i8 %mask)
-  ret <4 x i64> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> %passthru, <4 x i1> %extract)
+  ret <4 x i64> %2
 }
 
 define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) {
@@ -686,18 +710,18 @@ define <4 x i64> @test_maskz_expand_q_256(<4 x i64> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vpexpandq %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> zeroinitializer, i8 %mask)
-  ret <4 x i64> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = call <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64> %data, <4 x i64> zeroinitializer, <4 x i1> %extract)
+  ret <4 x i64> %2
 }
 
-declare <4 x i64> @llvm.x86.avx512.mask.expand.q.256(<4 x i64> %data, <4 x i64> %src0, i8 %mask)
-
 define <8 x i32> @test_expand_d_256(<8 x i32> %data) {
 ; CHECK-LABEL: test_expand_d_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> undef, i8 -1)
-  ret <8 x i32> %res
+  %1 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret <8 x i32> %1
 }
 
 define <8 x i32> @test_mask_expand_d_256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask) {
@@ -715,8 +739,9 @@ define <8 x i32> @test_mask_expand_d_256(<8 x i32> %data, <8 x i32> %passthru, i
 ; X64-NEXT:    vpexpandd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0xc8]
 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %passthru, i8 %mask)
-  ret <8 x i32> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> %passthru, <8 x i1> %1)
+  ret <8 x i32> %2
 }
 
 define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) {
@@ -732,12 +757,11 @@ define <8 x i32> @test_maskz_expand_d_256(<8 x i32> %data, i8 %mask) {
 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
 ; X64-NEXT:    vpexpandd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0xc0]
 ; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> zeroinitializer, i8 %mask)
-  ret <8 x i32> %res
+  %1 = bitcast i8 %mask to <8 x i1>
+  %2 = call <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32> %data, <8 x i32> zeroinitializer, <8 x i1> %1)
+  ret <8 x i32> %2
 }
 
-declare <8 x i32> @llvm.x86.avx512.mask.expand.d.256(<8 x i32> %data, <8 x i32> %src0, i8 %mask)
-
 define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
 ; CHECK-LABEL: test_cmpps_256:
 ; CHECK:       # %bb.0:
@@ -6915,3 +6939,19 @@ declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
 declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.avx512.mask.compress.v2f64(<2 x double>, <2 x double>, <2 x i1>)
+declare <4 x float> @llvm.x86.avx512.mask.compress.v4f32(<4 x float>, <4 x float>, <4 x i1>)
+declare <2 x i64> @llvm.x86.avx512.mask.compress.v2i64(<2 x i64>, <2 x i64>, <2 x i1>)
+declare <4 x i32> @llvm.x86.avx512.mask.compress.v4i32(<4 x i32>, <4 x i32>, <4 x i1>)
+declare <2 x double> @llvm.x86.avx512.mask.expand.v2f64(<2 x double>, <2 x double>, <2 x i1>)
+declare <4 x float> @llvm.x86.avx512.mask.expand.v4f32(<4 x float>, <4 x float>, <4 x i1>)
+declare <2 x i64> @llvm.x86.avx512.mask.expand.v2i64(<2 x i64>, <2 x i64>, <2 x i1>)
+declare <4 x i32> @llvm.x86.avx512.mask.expand.v4i32(<4 x i32>, <4 x i32>, <4 x i1>)
+declare <4 x double> @llvm.x86.avx512.mask.compress.v4f64(<4 x double>, <4 x double>, <4 x i1>)
+declare <8 x float> @llvm.x86.avx512.mask.compress.v8f32(<8 x float>, <8 x float>, <8 x i1>)
+declare <4 x i64> @llvm.x86.avx512.mask.compress.v4i64(<4 x i64>, <4 x i64>, <4 x i1>)
+declare <8 x i32> @llvm.x86.avx512.mask.compress.v8i32(<8 x i32>, <8 x i32>, <8 x i1>)
+declare <4 x double> @llvm.x86.avx512.mask.expand.v4f64(<4 x double>, <4 x double>, <4 x i1>)
+declare <8 x float> @llvm.x86.avx512.mask.expand.v8f32(<8 x float>, <8 x float>, <8 x i1>)
+declare <4 x i64> @llvm.x86.avx512.mask.expand.v4i64(<4 x i64>, <4 x i64>, <4 x i1>)
+declare <8 x i32> @llvm.x86.avx512.mask.expand.v8i32(<8 x i32>, <8 x i32>, <8 x i1>)
author	Craig Topper <craig.topper@intel.com>	2019-01-28 07:03:03 +0000
committer	Craig Topper <craig.topper@intel.com>	2019-01-28 07:03:03 +0000
commit	453150bc18d74c2ebbef8f780b3b9d2e6278a529 (patch)
tree	fd42d6959c047c17a3faa85136cc3640d9a9fc46
parent	b23d5ccafc6db995486ec69925127c61faa5152f (diff)
download	bcm5719-llvm-453150bc18d74c2ebbef8f780b3b9d2e6278a529.tar.gz bcm5719-llvm-453150bc18d74c2ebbef8f780b3b9d2e6278a529.zip