summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLuo, Yuanke <yuanke.luo@intel.com>2019-05-06 08:22:37 +0000
committerLuo, Yuanke <yuanke.luo@intel.com>2019-05-06 08:22:37 +0000
commitbeec41c656e7d716fd5755cce12e4934fdced267 (patch)
treecbb53258bcd3f11adc2ee6a8467def1692b30623
parentfb607580046ed9fe2891151a23375f0c524d29b3 (diff)
downloadbcm5719-llvm-beec41c656e7d716fd5755cce12e4934fdced267.tar.gz
bcm5719-llvm-beec41c656e7d716fd5755cce12e4934fdced267.zip
Enable AVX512_BF16 instructions, which are supported for BFLOAT16 in Cooper Lake
Summary: 1. Enable infrastructure of AVX512_BF16, which is supported for BFLOAT16 in Cooper Lake; 2. Enable VCVTNE2PS2BF16, VCVTNEPS2BF16 and DPBF16PS instructions, which are Vector Neural Network Instructions supporting BFLOAT16 inputs and conversion instructions from IEEE single precision. VCVTNE2PS2BF16: Convert Two Packed Single Data to One Packed BF16 Data. VCVTNEPS2BF16: Convert Packed Single Data to Packed BF16 Data. VDPBF16PS: Dot Product of BF16 Pairs Accumulated into Packed Single Precision. For more details about BF16 isa, please refer to the latest ISE document: https://software.intel.com/en-us/download/intel-architecture-instruction-set-extensions-programming-reference Author: LiuTianle Reviewers: craig.topper, smaslov, LuoYuanke, wxiao3, annita.zhang, RKSimon, spatel Reviewed By: craig.topper Subscribers: kristina, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D60550 llvm-svn: 360017
-rw-r--r--llvm/include/llvm/IR/IntrinsicsX86.td38
-rw-r--r--llvm/lib/Support/Host.cpp3
-rw-r--r--llvm/lib/Target/X86/X86.td3
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp19
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h13
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td140
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td19
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td1
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h11
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h4
-rw-r--r--test/CodeGen/X86/avx512bf16-intrinsics.ll160
-rw-r--r--test/CodeGen/X86/avx512bf16-vl-intrinsics.ll358
-rw-r--r--test/MC/Disassembler/X86/avx512bf16-att.txt82
-rw-r--r--test/MC/Disassembler/X86/avx512bf16-intel.txt82
-rw-r--r--test/MC/Disassembler/X86/avx512bf16vl-att.txt157
-rw-r--r--test/MC/Disassembler/X86/avx512bf16vl-intel.txt157
-rw-r--r--test/MC/Disassembler/X86/x86-64-avx512bf16-att.txt82
-rw-r--r--test/MC/Disassembler/X86/x86-64-avx512bf16-intel.txt83
-rw-r--r--test/MC/Disassembler/X86/x86-64-avx512bf16vl-att.txt158
-rw-r--r--test/MC/Disassembler/X86/x86-64-avx512bf16vl-intel.txt158
-rw-r--r--test/MC/X86/avx512_bf16-encoding.s90
-rw-r--r--test/MC/X86/avx512_bf16_vl-encoding.s170
-rw-r--r--test/MC/X86/intel-syntax-avx512_bf16.s90
-rw-r--r--test/MC/X86/intel-syntax-avx512_bf16_vl.s170
-rw-r--r--test/MC/X86/intel-syntax-x86-64-avx512_bf16.s90
-rw-r--r--test/MC/X86/intel-syntax-x86-64-avx512_bf16_vl.s178
-rw-r--r--test/MC/X86/x86-64-avx512_bf16-encoding.s90
-rw-r--r--test/MC/X86/x86-64-avx512_bf16_vl-encoding.s178
28 files changed, 2784 insertions, 0 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 06b603a788b..2635e3d8648 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -4834,3 +4834,41 @@ let TargetPrefix = "x86" in {
def int_x86_invpcid : GCCBuiltin<"__builtin_ia32_invpcid">,
Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty], []>;
}
+
+let TargetPrefix = "x86" in {
+ def int_x86_avx512bf16_cvtne2ps2bf16_128:
+ GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_128">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512bf16_cvtne2ps2bf16_256:
+ GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_256">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512bf16_cvtne2ps2bf16_512:
+ GCCBuiltin<"__builtin_ia32_cvtne2ps2bf16_512">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v16f32_ty, llvm_v16f32_ty],
+ [IntrNoMem]>;
+ // Intrinsic must be masked due to it producing less than 128 bits of results.
+ def int_x86_avx512bf16_mask_cvtneps2bf16_128:
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v4f32_ty, llvm_v8i16_ty, llvm_v4i1_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512bf16_cvtneps2bf16_256:
+ GCCBuiltin<"__builtin_ia32_cvtneps2bf16_256">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+ def int_x86_avx512bf16_cvtneps2bf16_512:
+ GCCBuiltin<"__builtin_ia32_cvtneps2bf16_512">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty], [IntrNoMem]>;
+ def int_x86_avx512bf16_dpbf16ps_128:
+ GCCBuiltin<"__builtin_ia32_dpbf16ps_128">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+ def int_x86_avx512bf16_dpbf16ps_256:
+ GCCBuiltin<"__builtin_ia32_dpbf16ps_256">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8i32_ty], [IntrNoMem]>;
+ def int_x86_avx512bf16_dpbf16ps_512:
+ GCCBuiltin<"__builtin_ia32_dpbf16ps_512">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>;
+}
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index 69362704687..4a7eff3f6e3 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -1375,6 +1375,9 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
// detecting features using the "-march=native" flag.
// For more info, see X86 ISA docs.
Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
+ bool HasLeaf7Subleaf1 =
+ MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+ Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
bool HasLeafD = MaxLevel >= 0xd &&
!getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index fe23a2900d5..a799c1fda49 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -167,6 +167,9 @@ def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
"Enable AVX-512 Vector Neural Network Instructions",
[FeatureAVX512]>;
+def FeatureBF16 : SubtargetFeature<"avx512bf16", "HasBF16", "true",
+ "Support bfloat16 floating point",
+ [FeatureBWI]>;
def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",
"Enable AVX-512 Bit Algorithms",
[FeatureBWI]>;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 053fe90d1f4..fc100fe4871 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -22624,6 +22624,21 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
PassThru, Mask);
}
+ case CVTNEPS2BF16_MASK: {
+ SDValue Src = Op.getOperand(1);
+ SDValue PassThru = Op.getOperand(2);
+ SDValue Mask = Op.getOperand(3);
+
+ if (ISD::isBuildVectorAllOnes(Mask.getNode()))
+ return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src);
+
+ // Break false dependency.
+ if (PassThru.isUndef())
+ PassThru = DAG.getConstant(0, dl, PassThru.getValueType());
+
+ return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru,
+ Mask);
+ }
default:
break;
}
@@ -28073,6 +28088,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CVTS2UI: return "X86ISD::CVTS2UI";
case X86ISD::CVTS2SI_RND: return "X86ISD::CVTS2SI_RND";
case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND";
+ case X86ISD::CVTNE2PS2BF16: return "X86ISD::CVTNE2PS2BF16";
+ case X86ISD::CVTNEPS2BF16: return "X86ISD::CVTNEPS2BF16";
+ case X86ISD::MCVTNEPS2BF16: return "X86ISD::MCVTNEPS2BF16";
+ case X86ISD::DPBF16PS: return "X86ISD::DPBF16PS";
case X86ISD::LWPINS: return "X86ISD::LWPINS";
case X86ISD::MGATHER: return "X86ISD::MGATHER";
case X86ISD::MSCATTER: return "X86ISD::MSCATTER";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 78e414b9fe8..9b2f059ae6b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -509,6 +509,19 @@ namespace llvm {
MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
MCVTSI2P, MCVTUI2P,
+ // Vector float to bfloat16.
+ // Convert TWO packed single data to one packed BF16 data
+ CVTNE2PS2BF16,
+ // Convert packed single data to packed BF16 data
+ CVTNEPS2BF16,
+ // Masked version of above.
+ // SRC, PASSTHRU, MASK
+ MCVTNEPS2BF16,
+
+ // Dot product of BF16 pairs to accumulated into
+ // packed single precision.
+ DPBF16PS,
+
// Save xmm argument registers to the stack, according to %al. An operator
// is needed so that this can be expanded with control flow.
VASTART_SAVE_XMM_REGS,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index d857b8ef1dc..d0d255b6a7f 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12647,3 +12647,143 @@ defm VP4DPWSSDSrm : AVX512_maskable_3src_in_asm<0x53, MRMSrcMem, v16i32_info,
Sched<[SchedWriteFMA.ZMM.Folded]>;
}
+multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
+ X86SchedWriteWidths sched,
+ AVX512VLVectorVTInfo _SrcVTInfo,
+ AVX512VLVectorVTInfo _DstVTInfo,
+ SDNode OpNode, Predicate prd,
+ bit IsCommutable = 0> {
+ let Predicates = [prd] in
+ defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
+ _SrcVTInfo.info512, _DstVTInfo.info512,
+ _SrcVTInfo.info512, IsCommutable>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+ let Predicates = [HasVLX, prd] in {
+ defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
+ _SrcVTInfo.info256, _DstVTInfo.info256,
+ _SrcVTInfo.info256, IsCommutable>,
+ EVEX_V256, EVEX_CD8<32, CD8VF>;
+ defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
+ _SrcVTInfo.info128, _DstVTInfo.info128,
+ _SrcVTInfo.info128, IsCommutable>,
+ EVEX_V128, EVEX_CD8<32, CD8VF>;
+ }
+}
+
+defm VCVTNE2PS2BF16 : avx512_binop_all2<0x72, "vcvtne2ps2bf16",
+ SchedWriteCvtPD2PS, //FIXME: Shoulod be SchedWriteCvtPS2BF
+ avx512vl_f32_info, avx512vl_i16_info,
+ X86cvtne2ps2bf16, HasBF16, 0>, T8XD;
+
+// Truncate Float to BFloat16
+multiclass avx512_cvtps2bf16<bits<8> opc, string OpcodeStr,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasBF16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i16x_info, v16f32_info,
+ X86cvtneps2bf16, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasBF16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v4f32x_info,
+ null_frag, sched.XMM, "{1to4}", "{x}", f128mem,
+ VK4WM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i16x_info, v8f32x_info,
+ X86cvtneps2bf16,
+ sched.YMM, "{1to8}", "{y}">, EVEX_V256;
+
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+ VR128X:$src), 0>;
+ def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst,
+ f128mem:$src), 0, "intel">;
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+ VR256X:$src), 0>;
+ def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst,
+ f256mem:$src), 0, "intel">;
+ }
+}
+
+defm VCVTNEPS2BF16 : avx512_cvtps2bf16<0x72, "vcvtneps2bf16",
+ SchedWriteCvtPD2PS>, T8XS,
+ EVEX_CD8<32, CD8VF>;
+
+let Predicates = [HasBF16, HasVLX] in {
+ // Special patterns to allow use of X86mcvtneps2bf16 for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32 VR128X:$src))),
+ (VCVTNEPS2BF16Z128rr VR128X:$src)>;
+ def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), (v8i16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTNEPS2BF16Z128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src)>;
+ def : Pat<(X86mcvtneps2bf16 (v4f32 VR128X:$src), v8i16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTNEPS2BF16Z128rrkz VK4WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v8i16 (X86cvtneps2bf16 (loadv4f32 addr:$src))),
+ (VCVTNEPS2BF16Z128rm addr:$src)>;
+ def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), (v8i16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTNEPS2BF16Z128rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvtneps2bf16 (loadv4f32 addr:$src), v8i16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTNEPS2BF16Z128rmkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8i16 (X86cvtneps2bf16 (v4f32
+ (X86VBroadcast (loadf32 addr:$src))))),
+ (VCVTNEPS2BF16Z128rmb addr:$src)>;
+ def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
+ (v8i16 VR128X:$src0), VK4WM:$mask),
+ (VCVTNEPS2BF16Z128rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86mcvtneps2bf16 (v4f32 (X86VBroadcast (loadf32 addr:$src))),
+ v8i16x_info.ImmAllZerosV, VK4WM:$mask),
+ (VCVTNEPS2BF16Z128rmbkz VK4WM:$mask, addr:$src)>;
+}
+
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_dpbf16ps_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _, X86VectorVTInfo src_v> {
+ defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+ EVEX_4V;
+
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+ (src_v.VT (bitconvert
+ (src_v.LdFrag addr:$src3)))))>, EVEX_4V;
+
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ OpcodeStr,
+ !strconcat("${src3}", _.BroadcastStr,", $src2"),
+ !strconcat("$src2, ${src3}", _.BroadcastStr),
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+ (src_v.VT (X86VBroadcast(src_v.ScalarLdFrag addr:$src3)))))>,
+ EVEX_B, EVEX_4V;
+
+}
+} // Constraints = "$src1 = $dst"
+
+multiclass avx512_dpbf16ps_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo _,
+ AVX512VLVectorVTInfo src_v, Predicate prd> {
+ let Predicates = [prd] in {
+ defm Z : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info512,
+ src_v.info512>, EVEX_V512;
+ }
+ let Predicates = [HasVLX, prd] in {
+ defm Z256 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info256,
+ src_v.info256>, EVEX_V256;
+ defm Z128 : avx512_dpbf16ps_rm<opc, OpcodeStr, OpNode, _.info128,
+ src_v.info128>, EVEX_V128;
+ }
+}
+
+defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps,
+ avx512vl_f32_info, avx512vl_i32_info,
+ HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 4d4d5faccdd..d79959e6455 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -664,6 +664,25 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
SDTCisOpSmallerThanOp<0, 1>,
SDTCisVT<2, i32>]>>;
+// cvt fp to bfloat16
+def X86cvtne2ps2bf16 : SDNode<"X86ISD::CVTNE2PS2BF16",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisSameAs<1,2>]>>;
+def X86mcvtneps2bf16 : SDNode<"X86ISD::MCVTNEPS2BF16",
+ SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, f32>,
+ SDTCisSameAs<0, 2>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<1, 3>]>>;
+def X86cvtneps2bf16 : SDNode<"X86ISD::CVTNEPS2BF16",
+ SDTypeProfile<1, 1, [SDTCVecEltisVT<0, i16>,
+ SDTCVecEltisVT<1, f32>]>>;
+def X86dpbf16ps : SDNode<"X86ISD::DPBF16PS",
+ SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
+ SDTCisSameAs<0,1>,
+ SDTCVecEltisVT<2, i32>,
+ SDTCisSameAs<2,3>]>>;
+
// galois field arithmetic
def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>;
def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 0176c2d707a..56bc0500078 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -835,6 +835,7 @@ def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
def PKU : Predicate<"Subtarget->hasPKU()">;
def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
+def HasBF16 : Predicate<"Subtarget->hasBF16()">;
def HasBITALG : Predicate<"Subtarget->hasBITALG()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 54168762c7a..40141d89462 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -19,6 +19,7 @@
namespace llvm {
enum IntrinsicType : uint16_t {
+ CVTNEPS2BF16_MASK,
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASSS,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
INTR_TYPE_3OP_IMM8,
@@ -981,6 +982,16 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_vpshufbitqmb_128, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
X86_INTRINSIC_DATA(avx512_vpshufbitqmb_256, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
X86_INTRINSIC_DATA(avx512_vpshufbitqmb_512, INTR_TYPE_2OP, X86ISD::VPSHUFBITQMB, 0),
+ // bfloat16
+ X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_128, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0),
+ X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_256, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0),
+ X86_INTRINSIC_DATA(avx512bf16_cvtne2ps2bf16_512, INTR_TYPE_2OP, X86ISD::CVTNE2PS2BF16, 0),
+ X86_INTRINSIC_DATA(avx512bf16_cvtneps2bf16_256, INTR_TYPE_1OP, X86ISD::CVTNEPS2BF16, 0),
+ X86_INTRINSIC_DATA(avx512bf16_cvtneps2bf16_512, INTR_TYPE_1OP, X86ISD::CVTNEPS2BF16, 0),
+ X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_128, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
+ X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_256, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
+ X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_512, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
+ X86_INTRINSIC_DATA(avx512bf16_mask_cvtneps2bf16_128, CVTNEPS2BF16_MASK, X86ISD::CVTNEPS2BF16, X86ISD::MCVTNEPS2BF16),
X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0),
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 0ff9d544d82..3b11bb12f62 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -353,6 +353,9 @@ protected:
/// Processor has AVX-512 Vector Neural Network Instructions
bool HasVNNI = false;
+ /// Processor has AVX-512 bfloat16 floating-point extensions
+ bool HasBF16 = false;
+
/// Processor has AVX-512 Bit Algorithms instructions
bool HasBITALG = false;
@@ -668,6 +671,7 @@ public:
bool hasVLX() const { return HasVLX; }
bool hasPKU() const { return HasPKU; }
bool hasVNNI() const { return HasVNNI; }
+ bool hasBF16() const { return HasBF16; }
bool hasBITALG() const { return HasBITALG; }
bool hasMPX() const { return HasMPX; }
bool hasSHSTK() const { return HasSHSTK; }
diff --git a/test/CodeGen/X86/avx512bf16-intrinsics.ll b/test/CodeGen/X86/avx512bf16-intrinsics.ll
new file mode 100644
index 00000000000..7b64c57237d
--- /dev/null
+++ b/test/CodeGen/X86/avx512bf16-intrinsics.ll
@@ -0,0 +1,160 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bf16 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bf16 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
+
+declare <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float>, <16 x float>) #3
+
+define <8 x i64> @test_mm512_cvtne2ps2bf16_512(<16 x float> %A, <16 x float> %B) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm512_cvtne2ps2bf16_512:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7f,0x48,0x72,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %A, <16 x float> %B) #4
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define <8 x i64> @test_mm512_maskz_cvtne2ps2bf16_512(<16 x float> %A, <16 x float> %B, i32 %U) local_unnamed_addr #2 {
+; X86-LABEL: test_mm512_maskz_cvtne2ps2bf16_512:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0xc9,0x72,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_maskz_cvtne2ps2bf16_512:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0xc9,0x72,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %A, <16 x float> %B) #4
+ %1 = bitcast i32 %U to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x i16> %0, <32 x i16> zeroinitializer
+ %3 = bitcast <32 x i16> %2 to <8 x i64>
+ ret <8 x i64> %3
+}
+
+define <8 x i64> @test_mm512_mask_cvtne2ps2bf16_512(<8 x i64> %C, i32 %U, <16 x float> %A, <16 x float> %B) local_unnamed_addr #2 {
+; X86-LABEL: test_mm512_mask_cvtne2ps2bf16_512:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtne2ps2bf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x49,0x72,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_mask_cvtne2ps2bf16_512:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtne2ps2bf16 %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x49,0x72,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %A, <16 x float> %B) #4
+ %1 = bitcast <8 x i64> %C to <32 x i16>
+ %2 = bitcast i32 %U to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %0, <32 x i16> %1
+ %4 = bitcast <32 x i16> %3 to <8 x i64>
+ ret <8 x i64> %4
+}
+
+declare <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float>) #3
+
+define <4 x i64> @test_mm512_cvtneps2bf16_512(<16 x float> %A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm512_cvtneps2bf16_512:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtneps2bf16 %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x72,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> %A) #4
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @test_mm512_maskz_cvtneps2bf16_512(<16 x float> %A, i16 %U) local_unnamed_addr #2 {
+; X86-LABEL: test_mm512_maskz_cvtneps2bf16_512:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw 4(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtneps2bf16 %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x72,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_maskz_cvtneps2bf16_512:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtneps2bf16 %zmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x72,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> %A) #4
+ %1 = bitcast i16 %U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer
+ %3 = bitcast <16 x i16> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <4 x i64> @test_mm512_mask_cvtneps2bf16_512(<4 x i64> %C, i16 %U, <16 x float> %A) local_unnamed_addr #2 {
+; X86-LABEL: test_mm512_mask_cvtneps2bf16_512:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw 4(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtneps2bf16 %zmm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x72,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_mask_cvtneps2bf16_512:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtneps2bf16 %zmm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x72,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.512(<16 x float> %A) #4
+ %1 = bitcast <4 x i64> %C to <16 x i16>
+ %2 = bitcast i16 %U to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1
+ %4 = bitcast <16 x i16> %3 to <4 x i64>
+ ret <4 x i64> %4
+}
+
+declare <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float>, <16 x i32>, <16 x i32>) #3
+
+define <16 x float> @test_mm512_dpbf16ps_512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm512_dpbf16ps_512:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vdpbf16ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x76,0x48,0x52,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B) #4
+ ret <16 x float> %0
+}
+
+define <16 x float> @test_mm512_maskz_dpbf16ps_512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B, i16 zeroext %U) local_unnamed_addr #2 {
+; X86-LABEL: test_mm512_maskz_dpbf16ps_512:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw 4(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vdpbf16ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xc9,0x52,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_maskz_dpbf16ps_512:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdpbf16ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xc9,0x52,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B) #4
+ %1 = bitcast i16 %U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
+ ret <16 x float> %2
+}
+define <16 x float> @test_mm512_mask_dpbf16ps_512(i16 zeroext %U, <16 x float> %E, <16 x i32> %A, <16 x i32> %B) local_unnamed_addr #2 {
+; X86-LABEL: test_mm512_mask_dpbf16ps_512:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw 4(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vdpbf16ps %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x49,0x52,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm512_mask_dpbf16ps_512:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdpbf16ps %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x49,0x52,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x float> @llvm.x86.avx512bf16.dpbf16ps.512(<16 x float> %E, <16 x i32> %A, <16 x i32> %B) #4
+ %1 = bitcast i16 %U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %E
+ ret <16 x float> %2
+}
diff --git a/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll b/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll
new file mode 100644
index 00000000000..b497ff7739d
--- /dev/null
+++ b/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll
@@ -0,0 +1,358 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bf16 -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bf16 -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
+
+declare <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float>, <4 x float>) #1
+
+define <2 x i64> @test_mm_cvtne2ps2bf16_128(<4 x float> %A, <4 x float> %B) local_unnamed_addr #0 {
+; CHECK-LABEL: test_mm_cvtne2ps2bf16_128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtne2ps2bf16 %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7f,0x08,0x72,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %A, <4 x float> %B) #2
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm_maskz_cvtne2ps2bf16_128(<4 x float> %A, <4 x float> %B, i8 zeroext %U) local_unnamed_addr #0 {
+; X86-LABEL: test_mm_maskz_cvtne2ps2bf16_128:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vcvtne2ps2bf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0x89,0x72,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm_maskz_cvtne2ps2bf16_128:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtne2ps2bf16 %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0x89,0x72,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %A, <4 x float> %B) #2
+ %1 = bitcast i8 %U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer
+ %3 = bitcast <8 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @test_mm_mask_cvtne2ps2bf16_128(<2 x i64> %C, i8 zeroext %U, <4 x float> %A, <4 x float> %B) local_unnamed_addr #0 {
+; X86-LABEL: test_mm_mask_cvtne2ps2bf16_128:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vcvtne2ps2bf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x09,0x72,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm_mask_cvtne2ps2bf16_128:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtne2ps2bf16 %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x77,0x09,0x72,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.128(<4 x float> %A, <4 x float> %B) #2
+ %1 = bitcast <2 x i64> %C to <8 x i16>
+ %2 = bitcast i8 %U to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1
+ %4 = bitcast <8 x i16> %3 to <2 x i64>
+ ret <2 x i64> %4
+}
+
+declare <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float>, <8 x float>) #3
+
+define <4 x i64> @test_mm256_cvtne2ps2bf16_256(<8 x float> %A, <8 x float> %B) local_unnamed_addr #1 {
+; CHECK-LABEL: test_mm256_cvtne2ps2bf16_256:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtne2ps2bf16 %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7f,0x28,0x72,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %A, <8 x float> %B) #4
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @test_mm256_maskz_cvtne2ps2bf16_256(<8 x float> %A, <8 x float> %B, i16 zeroext %U) local_unnamed_addr #1 {
+; X86-LABEL: test_mm256_maskz_cvtne2ps2bf16_256:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtne2ps2bf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0xa9,0x72,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_maskz_cvtne2ps2bf16_256:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtne2ps2bf16 %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7f,0xa9,0x72,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %A, <8 x float> %B) #4
+ %1 = bitcast i16 %U to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer
+ %3 = bitcast <16 x i16> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <4 x i64> @test_mm256_mask_cvtne2ps2bf16_256(<4 x i64> %C, i16 zeroext %U, <8 x float> %A, <8 x float> %B) local_unnamed_addr #1 {
+; X86-LABEL: test_mm256_mask_cvtne2ps2bf16_256:
+; X86: # %bb.0: # %entry
+; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT: vcvtne2ps2bf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x77,0x29,0x72,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_mask_cvtne2ps2bf16_256:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtne2ps2bf16 %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x77,0x29,0x72,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx512bf16.cvtne2ps2bf16.256(<8 x float> %A, <8 x float> %B) #4
+ %1 = bitcast <4 x i64> %C to <16 x i16>
+ %2 = bitcast i16 %U to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1
+ %4 = bitcast <16 x i16> %3 to <4 x i64>
+ ret <4 x i64> %4
+}
+
+declare <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float>) #3
+
+define <2 x i64> @test_mm256_cvtneps2bf16_256(<8 x float> %A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_cvtneps2bf16_256:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtneps2bf16 %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x72,0xc0]
+; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> %A) #4
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm256_maskz_cvtneps2bf16_256(<8 x float> %A, i8 zeroext %U) local_unnamed_addr #2 {
+; X86-LABEL: test_mm256_maskz_cvtneps2bf16_256:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vcvtneps2bf16 %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x72,0xc0]
+; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_maskz_cvtneps2bf16_256:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtneps2bf16 %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xa9,0x72,0xc0]
+; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> %A) #4
+ %1 = bitcast i8 %U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer
+ %3 = bitcast <8 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @test_mm256_mask_cvtneps2bf16_256(<2 x i64> %C, i8 zeroext %U, <8 x float> %A) local_unnamed_addr #2 {
+; X86-LABEL: test_mm256_mask_cvtneps2bf16_256:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vcvtneps2bf16 %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x72,0xc1]
+; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_mask_cvtneps2bf16_256:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtneps2bf16 %ymm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x29,0x72,0xc1]
+; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx512bf16.cvtneps2bf16.256(<8 x float> %A) #4
+ %1 = bitcast <2 x i64> %C to <8 x i16>
+ %2 = bitcast i8 %U to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1
+ %4 = bitcast <8 x i16> %3 to <2 x i64>
+ ret <2 x i64> %4
+}
+
+declare <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float>, <8 x i16>, <4 x i1>) #3
+
+define <2 x i64> @test_mm128_cvtneps2bf16_128(<4 x float> %A) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm128_cvtneps2bf16_128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vcvtneps2bf16 %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x72,0xc0]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %A, <8 x i16> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) #4
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm128_maskz_cvtneps2bf16_128(<4 x float> %A, i8 zeroext %U) local_unnamed_addr #2 {
+; X86-LABEL: test_mm128_maskz_cvtneps2bf16_128:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vcvtneps2bf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x72,0xc0]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm128_maskz_cvtneps2bf16_128:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtneps2bf16 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x72,0xc0]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = bitcast i8 %U to <8 x i1>
+ %1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %A, <8 x i16> zeroinitializer, <4 x i1> %1) #4
+ %3 = bitcast <8 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <2 x i64> @test_mm128_mask_cvtneps2bf16_128(<2 x i64> %C, i8 zeroext %U, <4 x float> %A) local_unnamed_addr #2 {
+; X86-LABEL: test_mm128_mask_cvtneps2bf16_128:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vcvtneps2bf16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x72,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm128_mask_cvtneps2bf16_128:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtneps2bf16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x72,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = bitcast i8 %U to <8 x i1>
+ %1 = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = bitcast <2 x i64> %C to <8 x i16>
+ %3 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %A, <8 x i16> %2, <4 x i1> %1) #4
+ %4 = bitcast <8 x i16> %3 to <2 x i64>
+ ret <2 x i64> %4
+}
+
+; Make sure we don't fold a select into the 128 bit form of cvtneps2bf16. It
+; always writes zeros to bits 127:64 regardless of mask.
+define <2 x i64> @test_mm128_cvtneps2bf16_128_select(<2 x i64> %C, i8 zeroext %U, <4 x float> %A) local_unnamed_addr #2 {
+; X86-LABEL: test_mm128_cvtneps2bf16_128_select:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vcvtneps2bf16 %xmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x08,0x72,0xc9]
+; X86-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm128_cvtneps2bf16_128_select:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vcvtneps2bf16 %xmm1, %xmm1 # encoding: [0x62,0xf2,0x7e,0x08,0x72,0xc9]
+; X64-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = bitcast i8 %U to <8 x i1>
+ %1 = bitcast <2 x i64> %C to <8 x i16>
+ %2 = tail call <8 x i16> @llvm.x86.avx512bf16.mask.cvtneps2bf16.128(<4 x float> %A, <8 x i16> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) #4
+ %3 = select <8 x i1> %0, <8 x i16> %2, <8 x i16> %1
+ %4 = bitcast <8 x i16> %3 to <2 x i64>
+ ret <2 x i64> %4
+}
+
+declare <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float>, <8 x i32>, <8 x i32>) #3
+
+define <8 x float> @test_mm256_dpbf16ps_256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm256_dpbf16ps_256:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vdpbf16ps %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x76,0x28,0x52,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B) #4
+ ret <8 x float> %0
+}
+
+define <8 x float> @test_mm256_maskz_dpbf16ps_256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B, i8 zeroext %U) local_unnamed_addr #2 {
+; X86-LABEL: test_mm256_maskz_dpbf16ps_256:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vdpbf16ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xa9,0x52,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_maskz_dpbf16ps_256:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdpbf16ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0xa9,0x52,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B) #4
+ %1 = bitcast i8 %U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> zeroinitializer
+ ret <8 x float> %2
+}
+define <8 x float> @test_mm256_mask_dpbf16ps_256(i8 zeroext %U, <8 x float> %E, <8 x i32> %A, <8 x i32> %B) local_unnamed_addr #2 {
+; X86-LABEL: test_mm256_mask_dpbf16ps_256:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vdpbf16ps %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x76,0x29,0x52,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm256_mask_dpbf16ps_256:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdpbf16ps %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x76,0x29,0x52,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x float> @llvm.x86.avx512bf16.dpbf16ps.256(<8 x float> %E, <8 x i32> %A, <8 x i32> %B) #4
+ %1 = bitcast i8 %U to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x float> %0, <8 x float> %E
+ ret <8 x float> %2
+}
+
+declare <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float>, <4 x i32>, <4 x i32>) #3
+
+define <4 x float> @test_mm128_dpbf16ps_128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B) local_unnamed_addr #2 {
+; CHECK-LABEL: test_mm128_dpbf16ps_128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vdpbf16ps %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x76,0x08,0x52,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float> %E, <4 x i32> %A, <4x i32> %B) #4
+ ret <4 x float> %0
+}
+
+define <4 x float> @test_mm128_maskz_dpbf16ps_128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B, i4 zeroext %U) local_unnamed_addr #2 {
+; X86-LABEL: test_mm128_maskz_dpbf16ps_128:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0x89,0x52,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm128_maskz_dpbf16ps_128:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0x89,0x52,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B) #4
+ %1 = bitcast i4 %U to <4 x i1>
+ %2 = select <4 x i1> %1, <4 x float> %0, <4 x float> zeroinitializer
+ ret <4 x float> %2
+}
+define <4 x float> @test_mm128_mask_dpbf16ps_128(i4 zeroext %U, <4 x float> %E, <4 x i32> %A, <4 x i32> %B) local_unnamed_addr #2 {
+; X86-LABEL: test_mm128_mask_dpbf16ps_128:
+; X86: # %bb.0: # %entry
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
+; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
+; X86-NEXT: vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x09,0x52,0xc2]
+; X86-NEXT: retl # encoding: [0xc3]
+;
+; X64-LABEL: test_mm128_mask_dpbf16ps_128:
+; X64: # %bb.0: # %entry
+; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; X64-NEXT: vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x09,0x52,0xc2]
+; X64-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x float> @llvm.x86.avx512bf16.dpbf16ps.128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B) #4
+ %1 = bitcast i4 %U to <4 x i1>
+ %2 = select <4 x i1> %1, <4 x float> %0, <4 x float> %E
+ ret <4 x float> %2
+}
diff --git a/test/MC/Disassembler/X86/avx512bf16-att.txt b/test/MC/Disassembler/X86/avx512bf16-att.txt
new file mode 100644
index 00000000000..a6dc4b684c5
--- /dev/null
+++ b/test/MC/Disassembler/X86/avx512bf16-att.txt
@@ -0,0 +1,82 @@
+# RUN: llvm-mc --disassemble %s -triple=i686-apple-darwin9 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 %zmm4, %zmm3, %zmm2
+0x62,0xf2,0x67,0x48,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 %zmm4, %zmm3, %zmm2 {%k7}
+0x62,0xf2,0x67,0x4f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 %zmm4, %zmm3, %zmm2 {%k7} {z}
+0x62,0xf2,0x67,0xcf,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 268435456(%esp,%esi,8), %zmm3, %zmm2
+0x62,0xf2,0x67,0x48,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+0x62,0xf2,0x67,0x4f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 (%eax){1to16}, %zmm3, %zmm2
+0x62,0xf2,0x67,0x58,0x72,0x10
+
+# CHECK: vcvtne2ps2bf16 -2048(,%ebp,2), %zmm3, %zmm2
+0x62,0xf2,0x67,0x48,0x72,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+0x62,0xf2,0x67,0xcf,0x72,0x51,0x7f
+
+# CHECK: vcvtne2ps2bf16 -512(%edx){1to16}, %zmm3, %zmm2 {%k7} {z}
+0x62,0xf2,0x67,0xdf,0x72,0x52,0x80
+
+# CHECK: vcvtneps2bf16 %zmm3, %ymm2
+0x62,0xf2,0x7e,0x48,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 %zmm3, %ymm2 {%k7}
+0x62,0xf2,0x7e,0x4f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 %zmm3, %ymm2 {%k7} {z}
+0x62,0xf2,0x7e,0xcf,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 268435456(%esp,%esi,8), %ymm2
+0x62,0xf2,0x7e,0x48,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16 291(%edi,%eax,4), %ymm2 {%k7}
+0x62,0xf2,0x7e,0x4f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16 (%eax){1to16}, %ymm2
+0x62,0xf2,0x7e,0x58,0x72,0x10
+
+# CHECK: vcvtneps2bf16 -2048(,%ebp,2), %ymm2
+0x62,0xf2,0x7e,0x48,0x72,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtneps2bf16 8128(%ecx), %ymm2 {%k7} {z}
+0x62,0xf2,0x7e,0xcf,0x72,0x51,0x7f
+
+# CHECK: vcvtneps2bf16 -512(%edx){1to16}, %ymm2 {%k7} {z}
+0x62,0xf2,0x7e,0xdf,0x72,0x52,0x80
+
+# CHECK: vdpbf16ps %zmm4, %zmm3, %zmm2
+0x62,0xf2,0x66,0x48,0x52,0xd4
+
+# CHECK: vdpbf16ps %zmm4, %zmm3, %zmm2 {%k7}
+0x62,0xf2,0x66,0x4f,0x52,0xd4
+
+# CHECK: vdpbf16ps %zmm4, %zmm3, %zmm2 {%k7} {z}
+0x62,0xf2,0x66,0xcf,0x52,0xd4
+
+# CHECK: vdpbf16ps 268435456(%esp,%esi,8), %zmm3, %zmm2
+0x62,0xf2,0x66,0x48,0x52,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps 291(%edi,%eax,4), %zmm3, %zmm2 {%k7}
+0x62,0xf2,0x66,0x4f,0x52,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps (%eax){1to16}, %zmm3, %zmm2
+0x62,0xf2,0x66,0x58,0x52,0x10
+
+# CHECK: vdpbf16ps -2048(,%ebp,2), %zmm3, %zmm2
+0x62,0xf2,0x66,0x48,0x52,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vdpbf16ps 8128(%ecx), %zmm3, %zmm2 {%k7} {z}
+0x62,0xf2,0x66,0xcf,0x52,0x51,0x7f
+
+# CHECK: vdpbf16ps -512(%edx){1to16}, %zmm3, %zmm2 {%k7} {z}
+0x62,0xf2,0x66,0xdf,0x52,0x52,0x80
diff --git a/test/MC/Disassembler/X86/avx512bf16-intel.txt b/test/MC/Disassembler/X86/avx512bf16-intel.txt
new file mode 100644
index 00000000000..53d44fa7fb7
--- /dev/null
+++ b/test/MC/Disassembler/X86/avx512bf16-intel.txt
@@ -0,0 +1,82 @@
+# RUN: llvm-mc --disassemble %s -triple=i686 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 zmm2, zmm3, zmm4
+0x62,0xf2,0x67,0x48,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 zmm2 {k7}, zmm3, zmm4
+0x62,0xf2,0x67,0x4f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf2,0x67,0xcf,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x48,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x4f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 zmm2, zmm3, dword ptr [eax]{1to16}
+0x62,0xf2,0x67,0x58,0x72,0x10
+
+# CHECK: vcvtne2ps2bf16 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x67,0x48,0x72,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x67,0xcf,0x72,0x51,0x7f
+
+# CHECK: vcvtne2ps2bf16 zmm2 {k7} {z}, zmm3, dword ptr [edx - 512]{1to16}
+0x62,0xf2,0x67,0xdf,0x72,0x52,0x80
+
+# CHECK: vcvtneps2bf16 ymm2, zmm3
+0x62,0xf2,0x7e,0x48,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 ymm2 {k7}, zmm3
+0x62,0xf2,0x7e,0x4f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 ymm2 {k7} {z}, zmm3
+0x62,0xf2,0x7e,0xcf,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 ymm2, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x7e,0x48,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16 ymm2 {k7}, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x7e,0x4f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16 ymm2, dword ptr [eax]{1to16}
+0x62,0xf2,0x7e,0x58,0x72,0x10
+
+# CHECK: vcvtneps2bf16 ymm2, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x7e,0x48,0x72,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtneps2bf16 ymm2 {k7} {z}, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x7e,0xcf,0x72,0x51,0x7f
+
+# CHECK: vcvtneps2bf16 ymm2 {k7} {z}, dword ptr [edx - 512]{1to16}
+0x62,0xf2,0x7e,0xdf,0x72,0x52,0x80
+
+# CHECK: vdpbf16ps zmm2, zmm3, zmm4
+0x62,0xf2,0x66,0x48,0x52,0xd4
+
+# CHECK: vdpbf16ps zmm2 {k7}, zmm3, zmm4
+0x62,0xf2,0x66,0x4f,0x52,0xd4
+
+# CHECK: vdpbf16ps zmm2 {k7} {z}, zmm3, zmm4
+0x62,0xf2,0x66,0xcf,0x52,0xd4
+
+# CHECK: vdpbf16ps zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x48,0x52,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps zmm2 {k7}, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x4f,0x52,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps zmm2, zmm3, dword ptr [eax]{1to16}
+0x62,0xf2,0x66,0x58,0x52,0x10
+
+# CHECK: vdpbf16ps zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x66,0x48,0x52,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vdpbf16ps zmm2 {k7} {z}, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x66,0xcf,0x52,0x51,0x7f
+
+# CHECK: vdpbf16ps zmm2 {k7} {z}, zmm3, dword ptr [edx - 512]{1to16}
+0x62,0xf2,0x66,0xdf,0x52,0x52,0x80
diff --git a/test/MC/Disassembler/X86/avx512bf16vl-att.txt b/test/MC/Disassembler/X86/avx512bf16vl-att.txt
new file mode 100644
index 00000000000..ea5e84ffab4
--- /dev/null
+++ b/test/MC/Disassembler/X86/avx512bf16vl-att.txt
@@ -0,0 +1,157 @@
+# RUN: llvm-mc --disassemble %s -triple=i686-apple-darwin9 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 %ymm4, %ymm3, %ymm2
+0x62,0xf2,0x67,0x28,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 %ymm4, %ymm3, %ymm2 {%k7}
+0x62,0xf2,0x67,0x2f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 %ymm4, %ymm3, %ymm2 {%k7} {z}
+0x62,0xf2,0x67,0xaf,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 %xmm4, %xmm3, %xmm2
+0x62,0xf2,0x67,0x08,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 %xmm4, %xmm3, %xmm2 {%k7}
+0x62,0xf2,0x67,0x0f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 %xmm4, %xmm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x67,0x8f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 268435456(%esp,%esi,8), %ymm3, %ymm2
+0x62,0xf2,0x67,0x28,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+0x62,0xf2,0x67,0x2f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 (%eax){1to8}, %ymm3, %ymm2
+0x62,0xf2,0x67,0x38,0x72,0x10
+
+# CHECK: vcvtne2ps2bf16 -1024(,%ebp,2), %ymm3, %ymm2
+0x62,0xf2,0x67,0x28,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+0x62,0xf2,0x67,0xaf,0x72,0x51,0x7f
+
+# CHECK: vcvtne2ps2bf16 -512(%edx){1to8}, %ymm3, %ymm2 {%k7} {z}
+0x62,0xf2,0x67,0xbf,0x72,0x52,0x80
+
+# CHECK: vcvtne2ps2bf16 268435456(%esp,%esi,8), %xmm3, %xmm2
+0x62,0xf2,0x67,0x08,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+0x62,0xf2,0x67,0x0f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 (%eax){1to4}, %xmm3, %xmm2
+0x62,0xf2,0x67,0x18,0x72,0x10
+
+# CHECK: vcvtne2ps2bf16 -512(,%ebp,2), %xmm3, %xmm2
+0x62,0xf2,0x67,0x08,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x67,0x8f,0x72,0x51,0x7f
+
+# CHECK: vcvtne2ps2bf16 -512(%edx){1to4}, %xmm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x67,0x9f,0x72,0x52,0x80
+
+# CHECK: vcvtneps2bf16 %xmm3, %xmm2
+0x62,0xf2,0x7e,0x08,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 %xmm3, %xmm2 {%k7}
+0x62,0xf2,0x7e,0x0f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 %xmm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x7e,0x8f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 %ymm3, %xmm2
+0x62,0xf2,0x7e,0x28,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 %ymm3, %xmm2 {%k7}
+0x62,0xf2,0x7e,0x2f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 %ymm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x7e,0xaf,0x72,0xd3
+
+# CHECK: vcvtneps2bf16x 268435456(%esp,%esi,8), %xmm2
+0x62,0xf2,0x7e,0x08,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16x 291(%edi,%eax,4), %xmm2 {%k7}
+0x62,0xf2,0x7e,0x0f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16 (%eax){1to4}, %xmm2
+0x62,0xf2,0x7e,0x18,0x72,0x10
+
+# CHECK: vcvtneps2bf16x -512(,%ebp,2), %xmm2
+0x62,0xf2,0x7e,0x08,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtneps2bf16x 2032(%ecx), %xmm2 {%k7} {z}
+0x62,0xf2,0x7e,0x8f,0x72,0x51,0x7f
+
+# CHECK: vcvtneps2bf16 -512(%edx){1to4}, %xmm2 {%k7} {z}
+0x62,0xf2,0x7e,0x9f,0x72,0x52,0x80
+
+# CHECK: vcvtneps2bf16 (%eax){1to8}, %xmm2
+0x62,0xf2,0x7e,0x38,0x72,0x10
+
+# CHECK: vcvtneps2bf16y -1024(,%ebp,2), %xmm2
+0x62,0xf2,0x7e,0x28,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtneps2bf16y 4064(%ecx), %xmm2 {%k7} {z}
+0x62,0xf2,0x7e,0xaf,0x72,0x51,0x7f
+
+# CHECK: vcvtneps2bf16 -512(%edx){1to8}, %xmm2 {%k7} {z}
+0x62,0xf2,0x7e,0xbf,0x72,0x52,0x80
+
+# CHECK: vdpbf16ps %ymm4, %ymm3, %ymm2
+0x62,0xf2,0x66,0x28,0x52,0xd4
+
+# CHECK: vdpbf16ps %ymm4, %ymm3, %ymm2 {%k7}
+0x62,0xf2,0x66,0x2f,0x52,0xd4
+
+# CHECK: vdpbf16ps %ymm4, %ymm3, %ymm2 {%k7} {z}
+0x62,0xf2,0x66,0xaf,0x52,0xd4
+
+# CHECK: vdpbf16ps %xmm4, %xmm3, %xmm2
+0x62,0xf2,0x66,0x08,0x52,0xd4
+
+# CHECK: vdpbf16ps %xmm4, %xmm3, %xmm2 {%k7}
+0x62,0xf2,0x66,0x0f,0x52,0xd4
+
+# CHECK: vdpbf16ps %xmm4, %xmm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x66,0x8f,0x52,0xd4
+
+# CHECK: vdpbf16ps 268435456(%esp,%esi,8), %ymm3, %ymm2
+0x62,0xf2,0x66,0x28,0x52,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps 291(%edi,%eax,4), %ymm3, %ymm2 {%k7}
+0x62,0xf2,0x66,0x2f,0x52,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps (%eax){1to8}, %ymm3, %ymm2
+0x62,0xf2,0x66,0x38,0x52,0x10
+
+# CHECK: vdpbf16ps -1024(,%ebp,2), %ymm3, %ymm2
+0x62,0xf2,0x66,0x28,0x52,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vdpbf16ps 4064(%ecx), %ymm3, %ymm2 {%k7} {z}
+0x62,0xf2,0x66,0xaf,0x52,0x51,0x7f
+
+# CHECK: vdpbf16ps -512(%edx){1to8}, %ymm3, %ymm2 {%k7} {z}
+0x62,0xf2,0x66,0xbf,0x52,0x52,0x80
+
+# CHECK: vdpbf16ps 268435456(%esp,%esi,8), %xmm3, %xmm2
+0x62,0xf2,0x66,0x08,0x52,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps 291(%edi,%eax,4), %xmm3, %xmm2 {%k7}
+0x62,0xf2,0x66,0x0f,0x52,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps (%eax){1to4}, %xmm3, %xmm2
+0x62,0xf2,0x66,0x18,0x52,0x10
+
+# CHECK: vdpbf16ps -512(,%ebp,2), %xmm3, %xmm2
+0x62,0xf2,0x66,0x08,0x52,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vdpbf16ps 2032(%ecx), %xmm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x66,0x8f,0x52,0x51,0x7f
+
+# CHECK: vdpbf16ps -512(%edx){1to4}, %xmm3, %xmm2 {%k7} {z}
+0x62,0xf2,0x66,0x9f,0x52,0x52,0x80
diff --git a/test/MC/Disassembler/X86/avx512bf16vl-intel.txt b/test/MC/Disassembler/X86/avx512bf16vl-intel.txt
new file mode 100644
index 00000000000..3d9dcb1879d
--- /dev/null
+++ b/test/MC/Disassembler/X86/avx512bf16vl-intel.txt
@@ -0,0 +1,157 @@
+# RUN: llvm-mc --disassemble %s -triple=i686 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 ymm2, ymm3, ymm4
+0x62,0xf2,0x67,0x28,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 ymm2 {k7}, ymm3, ymm4
+0x62,0xf2,0x67,0x2f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf2,0x67,0xaf,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 xmm2, xmm3, xmm4
+0x62,0xf2,0x67,0x08,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 xmm2 {k7}, xmm3, xmm4
+0x62,0xf2,0x67,0x0f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf2,0x67,0x8f,0x72,0xd4
+
+# CHECK: vcvtne2ps2bf16 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x28,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x2f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 ymm2, ymm3, dword ptr [eax]{1to8}
+0x62,0xf2,0x67,0x38,0x72,0x10
+
+# CHECK: vcvtne2ps2bf16 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x67,0x28,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x67,0xaf,0x72,0x51,0x7f
+
+# CHECK: vcvtne2ps2bf16 ymm2 {k7} {z}, ymm3, dword ptr [edx - 512]{1to8}
+0x62,0xf2,0x67,0xbf,0x72,0x52,0x80
+
+# CHECK: vcvtne2ps2bf16 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x08,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x0f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 xmm2, xmm3, dword ptr [eax]{1to4}
+0x62,0xf2,0x67,0x18,0x72,0x10
+
+# CHECK: vcvtne2ps2bf16 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x67,0x08,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x67,0x8f,0x72,0x51,0x7f
+
+# CHECK: vcvtne2ps2bf16 xmm2 {k7} {z}, xmm3, dword ptr [edx - 512]{1to4}
+0x62,0xf2,0x67,0x9f,0x72,0x52,0x80
+
+# CHECK: vcvtneps2bf16 xmm2, xmm3
+0x62,0xf2,0x7e,0x08,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 xmm2 {k7}, xmm3
+0x62,0xf2,0x7e,0x0f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 xmm2 {k7} {z}, xmm3
+0x62,0xf2,0x7e,0x8f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 xmm2, ymm3
+0x62,0xf2,0x7e,0x28,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 xmm2 {k7}, ymm3
+0x62,0xf2,0x7e,0x2f,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 xmm2 {k7} {z}, ymm3
+0x62,0xf2,0x7e,0xaf,0x72,0xd3
+
+# CHECK: vcvtneps2bf16 xmm2, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x7e,0x08,0x72,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16 xmm2 {k7}, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x7e,0x0f,0x72,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16 xmm2, dword ptr [eax]{1to4}
+0x62,0xf2,0x7e,0x18,0x72,0x10
+
+# CHECK: vcvtneps2bf16 xmm2, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x7e,0x08,0x72,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtneps2bf16 xmm2 {k7} {z}, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x7e,0x8f,0x72,0x51,0x7f
+
+# CHECK: vcvtneps2bf16 xmm2 {k7} {z}, dword ptr [edx - 512]{1to4}
+0x62,0xf2,0x7e,0x9f,0x72,0x52,0x80
+
+# CHECK: vcvtneps2bf16 xmm2, dword ptr [eax]{1to8}
+0x62,0xf2,0x7e,0x38,0x72,0x10
+
+# CHECK: vcvtneps2bf16 xmm2, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x7e,0x28,0x72,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtneps2bf16 xmm2 {k7} {z}, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x7e,0xaf,0x72,0x51,0x7f
+
+# CHECK: vcvtneps2bf16 xmm2 {k7} {z}, dword ptr [edx - 512]{1to8}
+0x62,0xf2,0x7e,0xbf,0x72,0x52,0x80
+
+# CHECK: vdpbf16ps ymm2, ymm3, ymm4
+0x62,0xf2,0x66,0x28,0x52,0xd4
+
+# CHECK: vdpbf16ps ymm2 {k7}, ymm3, ymm4
+0x62,0xf2,0x66,0x2f,0x52,0xd4
+
+# CHECK: vdpbf16ps ymm2 {k7} {z}, ymm3, ymm4
+0x62,0xf2,0x66,0xaf,0x52,0xd4
+
+# CHECK: vdpbf16ps xmm2, xmm3, xmm4
+0x62,0xf2,0x66,0x08,0x52,0xd4
+
+# CHECK: vdpbf16ps xmm2 {k7}, xmm3, xmm4
+0x62,0xf2,0x66,0x0f,0x52,0xd4
+
+# CHECK: vdpbf16ps xmm2 {k7} {z}, xmm3, xmm4
+0x62,0xf2,0x66,0x8f,0x52,0xd4
+
+# CHECK: vdpbf16ps ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x28,0x52,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps ymm2 {k7}, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x2f,0x52,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps ymm2, ymm3, dword ptr [eax]{1to8}
+0x62,0xf2,0x66,0x38,0x52,0x10
+
+# CHECK: vdpbf16ps ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x66,0x28,0x52,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vdpbf16ps ymm2 {k7} {z}, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x66,0xaf,0x52,0x51,0x7f
+
+# CHECK: vdpbf16ps ymm2 {k7} {z}, ymm3, dword ptr [edx - 512]{1to8}
+0x62,0xf2,0x66,0xbf,0x52,0x52,0x80
+
+# CHECK: vdpbf16ps xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x08,0x52,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps xmm2 {k7}, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x0f,0x52,0x94,0x87,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps xmm2, xmm3, dword ptr [eax]{1to4}
+0x62,0xf2,0x66,0x18,0x52,0x10
+
+# CHECK: vdpbf16ps xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x66,0x08,0x52,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vdpbf16ps xmm2 {k7} {z}, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x66,0x8f,0x52,0x51,0x7f
+
+# CHECK: vdpbf16ps xmm2 {k7} {z}, xmm3, dword ptr [edx - 512]{1to4}
+0x62,0xf2,0x66,0x9f,0x52,0x52,0x80
diff --git a/test/MC/Disassembler/X86/x86-64-avx512bf16-att.txt b/test/MC/Disassembler/X86/x86-64-avx512bf16-att.txt
new file mode 100644
index 00000000000..2b633a9a7cf
--- /dev/null
+++ b/test/MC/Disassembler/X86/x86-64-avx512bf16-att.txt
@@ -0,0 +1,82 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 %zmm24, %zmm23, %zmm22
+0x62,0x82,0x47,0x40,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 %zmm24, %zmm23, %zmm22 {%k7}
+0x62,0x82,0x47,0x47,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 %zmm24, %zmm23, %zmm22 {%k7} {z}
+0x62,0x82,0x47,0xc7,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 268435456(%rbp,%r14,8), %zmm23, %zmm22
+0x62,0xa2,0x47,0x40,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+0x62,0xc2,0x47,0x47,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 (%rip){1to16}, %zmm23, %zmm22
+0x62,0xe2,0x47,0x50,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 -2048(,%rbp,2), %zmm23, %zmm22
+0x62,0xe2,0x47,0x40,0x72,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+0x62,0xe2,0x47,0xc7,0x72,0x71,0x7f
+
+# CHECK: vcvtne2ps2bf16 -512(%rdx){1to16}, %zmm23, %zmm22 {%k7} {z}
+0x62,0xe2,0x47,0xd7,0x72,0x72,0x80
+
+# CHECK: vcvtneps2bf16 %zmm23, %ymm22
+0x62,0xa2,0x7e,0x48,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 %zmm23, %ymm22 {%k7}
+0x62,0xa2,0x7e,0x4f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 %zmm23, %ymm22 {%k7} {z}
+0x62,0xa2,0x7e,0xcf,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 268435456(%rbp,%r14,8), %ymm22
+0x62,0xa2,0x7e,0x48,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16 291(%r8,%rax,4), %ymm22 {%k7}
+0x62,0xc2,0x7e,0x4f,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16 (%rip){1to16}, %ymm22
+0x62,0xe2,0x7e,0x58,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtneps2bf16 -2048(,%rbp,2), %ymm22
+0x62,0xe2,0x7e,0x48,0x72,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtneps2bf16 8128(%rcx), %ymm22 {%k7} {z}
+0x62,0xe2,0x7e,0xcf,0x72,0x71,0x7f
+
+# CHECK: vcvtneps2bf16 -512(%rdx){1to16}, %ymm22 {%k7} {z}
+0x62,0xe2,0x7e,0xdf,0x72,0x72,0x80
+
+# CHECK: vdpbf16ps %zmm24, %zmm23, %zmm22
+0x62,0x82,0x46,0x40,0x52,0xf0
+
+# CHECK: vdpbf16ps %zmm24, %zmm23, %zmm22 {%k7}
+0x62,0x82,0x46,0x47,0x52,0xf0
+
+# CHECK: vdpbf16ps %zmm24, %zmm23, %zmm22 {%k7} {z}
+0x62,0x82,0x46,0xc7,0x52,0xf0
+
+# CHECK: vdpbf16ps 268435456(%rbp,%r14,8), %zmm23, %zmm22
+0x62,0xa2,0x46,0x40,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps 291(%r8,%rax,4), %zmm23, %zmm22 {%k7}
+0x62,0xc2,0x46,0x47,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps (%rip){1to16}, %zmm23, %zmm22
+0x62,0xe2,0x46,0x50,0x52,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vdpbf16ps -2048(,%rbp,2), %zmm23, %zmm22
+0x62,0xe2,0x46,0x40,0x52,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vdpbf16ps 8128(%rcx), %zmm23, %zmm22 {%k7} {z}
+0x62,0xe2,0x46,0xc7,0x52,0x71,0x7f
+
+# CHECK: vdpbf16ps -512(%rdx){1to16}, %zmm23, %zmm22 {%k7} {z}
+0x62,0xe2,0x46,0xd7,0x52,0x72,0x80
diff --git a/test/MC/Disassembler/X86/x86-64-avx512bf16-intel.txt b/test/MC/Disassembler/X86/x86-64-avx512bf16-intel.txt
new file mode 100644
index 00000000000..8bb3be91aed
--- /dev/null
+++ b/test/MC/Disassembler/X86/x86-64-avx512bf16-intel.txt
@@ -0,0 +1,83 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 zmm22, zmm23, zmm24
+0x62,0x82,0x47,0x40,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 zmm22 {k7}, zmm23, zmm24
+0x62,0x82,0x47,0x47,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x82,0x47,0xc7,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x40,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x47,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 zmm22, zmm23, dword ptr [rip]{1to16}
+0x62,0xe2,0x47,0x50,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x47,0x40,0x72,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x47,0xc7,0x72,0x71,0x7f
+
+# CHECK: vcvtne2ps2bf16 zmm22 {k7} {z}, zmm23, dword ptr [rdx - 512]{1to16}
+0x62,0xe2,0x47,0xd7,0x72,0x72,0x80
+
+# CHECK: vcvtneps2bf16 ymm22, zmm23
+0x62,0xa2,0x7e,0x48,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 ymm22 {k7}, zmm23
+0x62,0xa2,0x7e,0x4f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 ymm22 {k7} {z}, zmm23
+0x62,0xa2,0x7e,0xcf,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 ymm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x7e,0x48,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16 ymm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x7e,0x4f,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16 ymm22, dword ptr [rip]{1to16}
+0x62,0xe2,0x7e,0x58,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtneps2bf16 ymm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x7e,0x48,0x72,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vcvtneps2bf16 ymm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x7e,0xcf,0x72,0x71,0x7f
+
+# CHECK: vcvtneps2bf16 ymm22 {k7} {z}, dword ptr [rdx - 512]{1to16}
+0x62,0xe2,0x7e,0xdf,0x72,0x72,0x80
+
+# CHECK: vdpbf16ps zmm22, zmm23, zmm24
+0x62,0x82,0x46,0x40,0x52,0xf0
+
+# CHECK: vdpbf16ps zmm22 {k7}, zmm23, zmm24
+0x62,0x82,0x46,0x47,0x52,0xf0
+
+# CHECK: vdpbf16ps zmm22 {k7} {z}, zmm23, zmm24
+0x62,0x82,0x46,0xc7,0x52,0xf0
+
+# CHECK: vdpbf16ps zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x40,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps zmm22 {k7}, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x47,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps zmm22, zmm23, dword ptr [rip]{1to16}
+0x62,0xe2,0x46,0x50,0x52,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vdpbf16ps zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x46,0x40,0x52,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# CHECK: vdpbf16ps zmm22 {k7} {z}, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x46,0xc7,0x52,0x71,0x7f
+
+# CHECK: vdpbf16ps zmm22 {k7} {z}, zmm23, dword ptr [rdx - 512]{1to16}
+0x62,0xe2,0x46,0xd7,0x52,0x72,0x80
+
diff --git a/test/MC/Disassembler/X86/x86-64-avx512bf16vl-att.txt b/test/MC/Disassembler/X86/x86-64-avx512bf16vl-att.txt
new file mode 100644
index 00000000000..e2bfc98b13a
--- /dev/null
+++ b/test/MC/Disassembler/X86/x86-64-avx512bf16vl-att.txt
@@ -0,0 +1,158 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 %ymm24, %ymm23, %ymm22
+0x62,0x82,0x47,0x20,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 %ymm24, %ymm23, %ymm22 {%k7}
+0x62,0x82,0x47,0x27,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 %ymm24, %ymm23, %ymm22 {%k7} {z}
+0x62,0x82,0x47,0xa7,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 %xmm24, %xmm23, %xmm22
+0x62,0x82,0x47,0x00,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 %xmm24, %xmm23, %xmm22 {%k7}
+0x62,0x82,0x47,0x07,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 %xmm24, %xmm23, %xmm22 {%k7} {z}
+0x62,0x82,0x47,0x87,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 268435456(%rbp,%r14,8), %ymm23, %ymm22
+0x62,0xa2,0x47,0x20,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+0x62,0xc2,0x47,0x27,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 (%rip){1to8}, %ymm23, %ymm22
+0x62,0xe2,0x47,0x30,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 -1024(,%rbp,2), %ymm23, %ymm22
+0x62,0xe2,0x47,0x20,0x72,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+0x62,0xe2,0x47,0xa7,0x72,0x71,0x7f
+
+# CHECK: vcvtne2ps2bf16 -512(%rdx){1to8}, %ymm23, %ymm22 {%k7} {z}
+0x62,0xe2,0x47,0xb7,0x72,0x72,0x80
+
+# CHECK: vcvtne2ps2bf16 268435456(%rbp,%r14,8), %xmm23, %xmm22
+0x62,0xa2,0x47,0x00,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+0x62,0xc2,0x47,0x07,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 (%rip){1to4}, %xmm23, %xmm22
+0x62,0xe2,0x47,0x10,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 -512(,%rbp,2), %xmm23, %xmm22
+0x62,0xe2,0x47,0x00,0x72,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+0x62,0xe2,0x47,0x87,0x72,0x71,0x7f
+
+# CHECK: vcvtne2ps2bf16 -512(%rdx){1to4}, %xmm23, %xmm22 {%k7} {z}
+0x62,0xe2,0x47,0x97,0x72,0x72,0x80
+
+# CHECK: vcvtneps2bf16 %xmm23, %xmm22
+0x62,0xa2,0x7e,0x08,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 %xmm23, %xmm22 {%k7}
+0x62,0xa2,0x7e,0x0f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 %xmm23, %xmm22 {%k7} {z}
+0x62,0xa2,0x7e,0x8f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 %ymm23, %xmm22
+0x62,0xa2,0x7e,0x28,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 %ymm23, %xmm22 {%k7}
+0x62,0xa2,0x7e,0x2f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 %ymm23, %xmm22 {%k7} {z}
+0x62,0xa2,0x7e,0xaf,0x72,0xf7
+
+# CHECK: vcvtneps2bf16x 268435456(%rbp,%r14,8), %xmm22
+0x62,0xa2,0x7e,0x08,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16x 291(%r8,%rax,4), %xmm22 {%k7}
+0x62,0xc2,0x7e,0x0f,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16 (%rip){1to4}, %xmm22
+0x62,0xe2,0x7e,0x18,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtneps2bf16x -512(,%rbp,2), %xmm22
+0x62,0xe2,0x7e,0x08,0x72,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtneps2bf16x 2032(%rcx), %xmm22 {%k7} {z}
+0x62,0xe2,0x7e,0x8f,0x72,0x71,0x7f
+
+# CHECK: vcvtneps2bf16 -512(%rdx){1to4}, %xmm22 {%k7} {z}
+0x62,0xe2,0x7e,0x9f,0x72,0x72,0x80
+
+# CHECK: vcvtneps2bf16 (%rip){1to8}, %xmm22
+0x62,0xe2,0x7e,0x38,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtneps2bf16y -1024(,%rbp,2), %xmm22
+0x62,0xe2,0x7e,0x28,0x72,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtneps2bf16y 4064(%rcx), %xmm22 {%k7} {z}
+0x62,0xe2,0x7e,0xaf,0x72,0x71,0x7f
+
+# CHECK: vcvtneps2bf16 -512(%rdx){1to8}, %xmm22 {%k7} {z}
+0x62,0xe2,0x7e,0xbf,0x72,0x72,0x80
+
+# CHECK: vdpbf16ps %ymm24, %ymm23, %ymm22
+0x62,0x82,0x46,0x20,0x52,0xf0
+
+# CHECK: vdpbf16ps %ymm24, %ymm23, %ymm22 {%k7}
+0x62,0x82,0x46,0x27,0x52,0xf0
+
+# CHECK: vdpbf16ps %ymm24, %ymm23, %ymm22 {%k7} {z}
+0x62,0x82,0x46,0xa7,0x52,0xf0
+
+# CHECK: vdpbf16ps %xmm24, %xmm23, %xmm22
+0x62,0x82,0x46,0x00,0x52,0xf0
+
+# CHECK: vdpbf16ps %xmm24, %xmm23, %xmm22 {%k7}
+0x62,0x82,0x46,0x07,0x52,0xf0
+
+# CHECK: vdpbf16ps %xmm24, %xmm23, %xmm22 {%k7} {z}
+0x62,0x82,0x46,0x87,0x52,0xf0
+
+# CHECK: vdpbf16ps 268435456(%rbp,%r14,8), %ymm23, %ymm22
+0x62,0xa2,0x46,0x20,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps 291(%r8,%rax,4), %ymm23, %ymm22 {%k7}
+0x62,0xc2,0x46,0x27,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps (%rip){1to8}, %ymm23, %ymm22
+0x62,0xe2,0x46,0x30,0x52,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vdpbf16ps -1024(,%rbp,2), %ymm23, %ymm22
+0x62,0xe2,0x46,0x20,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vdpbf16ps 4064(%rcx), %ymm23, %ymm22 {%k7} {z}
+0x62,0xe2,0x46,0xa7,0x52,0x71,0x7f
+
+# CHECK: vdpbf16ps -512(%rdx){1to8}, %ymm23, %ymm22 {%k7} {z}
+0x62,0xe2,0x46,0xb7,0x52,0x72,0x80
+
+# CHECK: vdpbf16ps 268435456(%rbp,%r14,8), %xmm23, %xmm22
+0x62,0xa2,0x46,0x00,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps 291(%r8,%rax,4), %xmm23, %xmm22 {%k7}
+0x62,0xc2,0x46,0x07,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps (%rip){1to4}, %xmm23, %xmm22
+0x62,0xe2,0x46,0x10,0x52,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vdpbf16ps -512(,%rbp,2), %xmm23, %xmm22
+0x62,0xe2,0x46,0x00,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vdpbf16ps 2032(%rcx), %xmm23, %xmm22 {%k7} {z}
+0x62,0xe2,0x46,0x87,0x52,0x71,0x7f
+
+# CHECK: vdpbf16ps -512(%rdx){1to4}, %xmm23, %xmm22 {%k7} {z}
+0x62,0xe2,0x46,0x97,0x52,0x72,0x80
+
diff --git a/test/MC/Disassembler/X86/x86-64-avx512bf16vl-intel.txt b/test/MC/Disassembler/X86/x86-64-avx512bf16vl-intel.txt
new file mode 100644
index 00000000000..4def95ed899
--- /dev/null
+++ b/test/MC/Disassembler/X86/x86-64-avx512bf16vl-intel.txt
@@ -0,0 +1,158 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s
+
+# CHECK: vcvtne2ps2bf16 ymm22, ymm23, ymm24
+0x62,0x82,0x47,0x20,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 ymm22 {k7}, ymm23, ymm24
+0x62,0x82,0x47,0x27,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x82,0x47,0xa7,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 xmm22, xmm23, xmm24
+0x62,0x82,0x47,0x00,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 xmm22 {k7}, xmm23, xmm24
+0x62,0x82,0x47,0x07,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x82,0x47,0x87,0x72,0xf0
+
+# CHECK: vcvtne2ps2bf16 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x20,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x27,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 ymm22, ymm23, dword ptr [rip]{1to8}
+0x62,0xe2,0x47,0x30,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x47,0x20,0x72,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x47,0xa7,0x72,0x71,0x7f
+
+# CHECK: vcvtne2ps2bf16 ymm22 {k7} {z}, ymm23, dword ptr [rdx - 512]{1to8}
+0x62,0xe2,0x47,0xb7,0x72,0x72,0x80
+
+# CHECK: vcvtne2ps2bf16 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x00,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtne2ps2bf16 xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x07,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 xmm22, xmm23, dword ptr [rip]{1to4}
+0x62,0xe2,0x47,0x10,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtne2ps2bf16 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x47,0x00,0x72,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtne2ps2bf16 xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x47,0x87,0x72,0x71,0x7f
+
+# CHECK: vcvtne2ps2bf16 xmm22 {k7} {z}, xmm23, dword ptr [rdx - 512]{1to4}
+0x62,0xe2,0x47,0x97,0x72,0x72,0x80
+
+# CHECK: vcvtneps2bf16 xmm22, xmm23
+0x62,0xa2,0x7e,0x08,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 xmm22 {k7}, xmm23
+0x62,0xa2,0x7e,0x0f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 xmm22 {k7} {z}, xmm23
+0x62,0xa2,0x7e,0x8f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 xmm22, ymm23
+0x62,0xa2,0x7e,0x28,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 xmm22 {k7}, ymm23
+0x62,0xa2,0x7e,0x2f,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 xmm22 {k7} {z}, ymm23
+0x62,0xa2,0x7e,0xaf,0x72,0xf7
+
+# CHECK: vcvtneps2bf16 xmm22, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x7e,0x08,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vcvtneps2bf16 xmm22 {k7}, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x7e,0x0f,0x72,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vcvtneps2bf16 xmm22, dword ptr [rip]{1to4}
+0x62,0xe2,0x7e,0x18,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtneps2bf16 xmm22, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x7e,0x08,0x72,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vcvtneps2bf16 xmm22 {k7} {z}, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x7e,0x8f,0x72,0x71,0x7f
+
+# CHECK: vcvtneps2bf16 xmm22 {k7} {z}, dword ptr [rdx - 512]{1to4}
+0x62,0xe2,0x7e,0x9f,0x72,0x72,0x80
+
+# CHECK: vcvtneps2bf16 xmm22, dword ptr [rip]{1to8}
+0x62,0xe2,0x7e,0x38,0x72,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vcvtneps2bf16 xmm22, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x7e,0x28,0x72,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vcvtneps2bf16 xmm22 {k7} {z}, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x7e,0xaf,0x72,0x71,0x7f
+
+# CHECK: vcvtneps2bf16 xmm22 {k7} {z}, dword ptr [rdx - 512]{1to8}
+0x62,0xe2,0x7e,0xbf,0x72,0x72,0x80
+
+# CHECK: vdpbf16ps ymm22, ymm23, ymm24
+0x62,0x82,0x46,0x20,0x52,0xf0
+
+# CHECK: vdpbf16ps ymm22 {k7}, ymm23, ymm24
+0x62,0x82,0x46,0x27,0x52,0xf0
+
+# CHECK: vdpbf16ps ymm22 {k7} {z}, ymm23, ymm24
+0x62,0x82,0x46,0xa7,0x52,0xf0
+
+# CHECK: vdpbf16ps xmm22, xmm23, xmm24
+0x62,0x82,0x46,0x00,0x52,0xf0
+
+# CHECK: vdpbf16ps xmm22 {k7}, xmm23, xmm24
+0x62,0x82,0x46,0x07,0x52,0xf0
+
+# CHECK: vdpbf16ps xmm22 {k7} {z}, xmm23, xmm24
+0x62,0x82,0x46,0x87,0x52,0xf0
+
+# CHECK: vdpbf16ps ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x20,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps ymm22 {k7}, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x27,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps ymm22, ymm23, dword ptr [rip]{1to8}
+0x62,0xe2,0x46,0x30,0x52,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vdpbf16ps ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x46,0x20,0x52,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# CHECK: vdpbf16ps ymm22 {k7} {z}, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x46,0xa7,0x52,0x71,0x7f
+
+# CHECK: vdpbf16ps ymm22 {k7} {z}, ymm23, dword ptr [rdx - 512]{1to8}
+0x62,0xe2,0x46,0xb7,0x52,0x72,0x80
+
+# CHECK: vdpbf16ps xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x00,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# CHECK: vdpbf16ps xmm22 {k7}, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x07,0x52,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# CHECK: vdpbf16ps xmm22, xmm23, dword ptr [rip]{1to4}
+0x62,0xe2,0x46,0x10,0x52,0x35,0x00,0x00,0x00,0x00
+
+# CHECK: vdpbf16ps xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x46,0x00,0x52,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# CHECK: vdpbf16ps xmm22 {k7} {z}, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x46,0x87,0x52,0x71,0x7f
+
+# CHECK: vdpbf16ps xmm22 {k7} {z}, xmm23, dword ptr [rdx - 512]{1to4}
+0x62,0xe2,0x46,0x97,0x52,0x72,0x80
+
diff --git a/test/MC/X86/avx512_bf16-encoding.s b/test/MC/X86/avx512_bf16-encoding.s
new file mode 100644
index 00000000000..675bc33dceb
--- /dev/null
+++ b/test/MC/X86/avx512_bf16-encoding.s
@@ -0,0 +1,90 @@
+// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding < %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xf4]
+ vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x4f,0x72,0xf4]
+ vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x57,0xcf,0x72,0xf4]
+ vcvtne2ps2bf16 %zmm4, %zmm5, %zmm6 {%k7} {z}
+
+// CHECK: vcvtne2ps2bf16 (%ecx), %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x31]
+ vcvtne2ps2bf16 (%ecx), %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16 291(%esp,%esi,8), %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00]
+ vcvtne2ps2bf16 291(%esp,%esi,8), %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16 268435456(%esp,%esi,8), %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtne2ps2bf16 268435456(%esp,%esi,8), %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16 -64(%esp), %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x74,0x24,0xff]
+ vcvtne2ps2bf16 -64(%esp), %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16 (%eax){1to16}, %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x30]
+ vcvtne2ps2bf16 (%eax){1to16}, %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16 8128(%edx), %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 8128(%edx), %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16 -8192(%edx), %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x72,0x80]
+ vcvtne2ps2bf16 -8192(%edx), %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16 508(%edx){1to16}, %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 508(%edx){1to16}, %zmm5, %zmm6
+
+// CHECK: vcvtne2ps2bf16 -512(%edx){1to16}, %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x72,0x80]
+ vcvtne2ps2bf16 -512(%edx){1to16}, %zmm5, %zmm6
+
+// CHECK: vcvtneps2bf16 %zmm5, %ymm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x48,0x72,0xf5]
+ vcvtneps2bf16 %zmm5, %ymm6
+
+// CHECK: vcvtneps2bf16 268435456(%esp,%esi,8), %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x4f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtneps2bf16 268435456(%esp,%esi,8), %ymm6 {%k7}
+
+// CHECK: vcvtneps2bf16 (%ecx){1to16}, %ymm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x58,0x72,0x31]
+ vcvtneps2bf16 (%ecx){1to16}, %ymm6
+
+// CHECK: vcvtneps2bf16 8128(%ecx), %ymm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x48,0x72,0x71,0x7f]
+ vcvtneps2bf16 8128(%ecx), %ymm6
+
+// CHECK: vcvtneps2bf16 -512(%edx){1to16}, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x7e,0xdf,0x72,0x72,0x80]
+ vcvtneps2bf16 -512(%edx){1to16}, %ymm6 {%k7} {z}
+
+// CHECK: vdpbf16ps %zmm4, %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x48,0x52,0xf4]
+ vdpbf16ps %zmm4, %zmm5, %zmm6
+
+// CHECK: vdpbf16ps 268435456(%esp,%esi,8), %zmm5, %zmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x56,0x4f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vdpbf16ps 268435456(%esp,%esi,8), %zmm5, %zmm6 {%k7}
+
+// CHECK: vdpbf16ps (%ecx){1to16}, %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x58,0x52,0x31]
+ vdpbf16ps (%ecx){1to16}, %zmm5, %zmm6
+
+// CHECK: vdpbf16ps 8128(%ecx), %zmm5, %zmm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x48,0x52,0x71,0x7f]
+ vdpbf16ps 8128(%ecx), %zmm5, %zmm6
+
+// CHECK: vdpbf16ps -512(%edx){1to16}, %zmm5, %zmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x56,0xdf,0x52,0x72,0x80]
+ vdpbf16ps -512(%edx){1to16}, %zmm5, %zmm6 {%k7} {z}
+
diff --git a/test/MC/X86/avx512_bf16_vl-encoding.s b/test/MC/X86/avx512_bf16_vl-encoding.s
new file mode 100644
index 00000000000..ffa42e25ed1
--- /dev/null
+++ b/test/MC/X86/avx512_bf16_vl-encoding.s
@@ -0,0 +1,170 @@
+// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding < %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 %xmm4, %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xf4]
+ vcvtne2ps2bf16 %xmm4, %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 %xmm4, %xmm5, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x57,0x8f,0x72,0xf4]
+ vcvtne2ps2bf16 %xmm4, %xmm5, %xmm6 {%k7} {z}
+
+// CHECK: vcvtne2ps2bf16 (%ecx), %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x31]
+ vcvtne2ps2bf16 (%ecx), %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 291(%esp,%esi,8), %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00]
+ vcvtne2ps2bf16 291(%esp,%esi,8), %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtne2ps2bf16 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 -16(%esp), %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x74,0x24,0xff]
+ vcvtne2ps2bf16 -16(%esp), %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 (%eax){1to4}, %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x30]
+ vcvtne2ps2bf16 (%eax){1to4}, %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 2032(%edx), %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 2032(%edx), %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 -2048(%edx), %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x72,0x80]
+ vcvtne2ps2bf16 -2048(%edx), %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 508(%edx){1to4}, %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 508(%edx){1to4}, %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 -512(%edx){1to4}, %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x72,0x80]
+ vcvtne2ps2bf16 -512(%edx){1to4}, %xmm5, %xmm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 %ymm4, %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xf4]
+ vcvtne2ps2bf16 %ymm4, %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 %ymm4, %ymm5, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x57,0xaf,0x72,0xf4]
+ vcvtne2ps2bf16 %ymm4, %ymm5, %ymm6 {%k7} {z}
+
+// CHECK: vcvtne2ps2bf16 (%ecx), %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x31]
+ vcvtne2ps2bf16 (%ecx), %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 291(%esp,%esi,8), %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00]
+ vcvtne2ps2bf16 291(%esp,%esi,8), %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtne2ps2bf16 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 -32(%esp), %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x74,0x24,0xff]
+ vcvtne2ps2bf16 -32(%esp), %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 (%eax){1to8}, %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x30]
+ vcvtne2ps2bf16 (%eax){1to8}, %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 4064(%edx), %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 4064(%edx), %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 -4096(%edx), %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x72,0x80]
+ vcvtne2ps2bf16 -4096(%edx), %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 508(%edx){1to8}, %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 508(%edx){1to8}, %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtne2ps2bf16 -512(%edx){1to8}, %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x72,0x80]
+ vcvtne2ps2bf16 -512(%edx){1to8}, %ymm5, %ymm6 {%k7}
+
+// CHECK: vcvtneps2bf16 %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x08,0x72,0xf5]
+ vcvtneps2bf16 %xmm5, %xmm6
+
+// CHECK: vcvtneps2bf16x 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x0f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtneps2bf16x 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtneps2bf16 (%ecx){1to4}, %xmm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x18,0x72,0x31]
+ vcvtneps2bf16 (%ecx){1to4}, %xmm6
+
+// CHECK: vcvtneps2bf16x 2032(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x08,0x72,0x71,0x7f]
+ vcvtneps2bf16x 2032(%ecx), %xmm6
+
+// CHECK: vcvtneps2bf16 -512(%edx){1to4}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x9f,0x72,0x72,0x80]
+ vcvtneps2bf16 -512(%edx){1to4}, %xmm6 {%k7} {z}
+
+// CHECK: vcvtneps2bf16 %ymm5, %xmm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x28,0x72,0xf5]
+ vcvtneps2bf16 %ymm5, %xmm6
+
+// CHECK: vcvtneps2bf16y 268435456(%esp,%esi,8), %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x2f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtneps2bf16y 268435456(%esp,%esi,8), %xmm6 {%k7}
+
+// CHECK: vcvtneps2bf16 (%ecx){1to8}, %xmm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x38,0x72,0x31]
+ vcvtneps2bf16 (%ecx){1to8}, %xmm6
+
+// CHECK: vcvtneps2bf16y 4064(%ecx), %xmm6
+// CHECK: encoding: [0x62,0xf2,0x7e,0x28,0x72,0x71,0x7f]
+ vcvtneps2bf16y 4064(%ecx), %xmm6
+
+// CHECK: vcvtneps2bf16 -512(%edx){1to8}, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x7e,0xbf,0x72,0x72,0x80]
+ vcvtneps2bf16 -512(%edx){1to8}, %xmm6 {%k7} {z}
+
+// CHECK: vdpbf16ps %ymm4, %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x28,0x52,0xf4]
+ vdpbf16ps %ymm4, %ymm5, %ymm6
+
+// CHECK: vdpbf16ps 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x56,0x2f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vdpbf16ps 268435456(%esp,%esi,8), %ymm5, %ymm6 {%k7}
+
+// CHECK: vdpbf16ps (%ecx){1to8}, %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x38,0x52,0x31]
+ vdpbf16ps (%ecx){1to8}, %ymm5, %ymm6
+
+// CHECK: vdpbf16ps 4064(%ecx), %ymm5, %ymm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x28,0x52,0x71,0x7f]
+ vdpbf16ps 4064(%ecx), %ymm5, %ymm6
+
+// CHECK: vdpbf16ps -512(%edx){1to8}, %ymm5, %ymm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x56,0xbf,0x52,0x72,0x80]
+ vdpbf16ps -512(%edx){1to8}, %ymm5, %ymm6 {%k7} {z}
+
+// CHECK: vdpbf16ps %xmm4, %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x08,0x52,0xf4]
+ vdpbf16ps %xmm4, %xmm5, %xmm6
+
+// CHECK: vdpbf16ps 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
+// CHECK: encoding: [0x62,0xf2,0x56,0x0f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vdpbf16ps 268435456(%esp,%esi,8), %xmm5, %xmm6 {%k7}
+
+// CHECK: vdpbf16ps (%ecx){1to4}, %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x18,0x52,0x31]
+ vdpbf16ps (%ecx){1to4}, %xmm5, %xmm6
+
+// CHECK: vdpbf16ps 2032(%ecx), %xmm5, %xmm6
+// CHECK: encoding: [0x62,0xf2,0x56,0x08,0x52,0x71,0x7f]
+ vdpbf16ps 2032(%ecx), %xmm5, %xmm6
+
+// CHECK: vdpbf16ps -512(%edx){1to4}, %xmm5, %xmm6 {%k7} {z}
+// CHECK: encoding: [0x62,0xf2,0x56,0x9f,0x52,0x72,0x80]
+ vdpbf16ps -512(%edx){1to4}, %xmm5, %xmm6 {%k7} {z}
+
diff --git a/test/MC/X86/intel-syntax-avx512_bf16.s b/test/MC/X86/intel-syntax-avx512_bf16.s
new file mode 100644
index 00000000000..3c9fca96deb
--- /dev/null
+++ b/test/MC/X86/intel-syntax-avx512_bf16.s
@@ -0,0 +1,90 @@
+// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmm4
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xf4]
+ vcvtne2ps2bf16 zmm6, zmm5, zmm4
+
+// CHECK: vcvtne2ps2bf16 zmm6 {k7}, zmm5, zmm4
+// CHECK: encoding: [0x62,0xf2,0x57,0x4f,0x72,0xf4]
+ vcvtne2ps2bf16 zmm6 {k7}, zmm5, zmm4
+
+// CHECK: vcvtne2ps2bf16 zmm6 {k7} {z}, zmm5, zmm4
+// CHECK: encoding: [0x62,0xf2,0x57,0xcf,0x72,0xf4]
+ vcvtne2ps2bf16 zmm6 {k7} {z}, zmm5, zmm4
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [ecx]
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x31]
+ vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [ecx]
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp + 8*esi + 291]
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00]
+ vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp + 8*esi + 291]
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp - 4]
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff]
+ vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [esp - 4]
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, dword ptr [eax]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x30]
+ vcvtne2ps2bf16 zmm6, zmm5, dword ptr [eax]{1to16}
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [edx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [edx + 8128]
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [edx - 8192]
+// CHECK: encoding: [0x62,0xf2,0x57,0x48,0x72,0x72,0x80]
+ vcvtne2ps2bf16 zmm6, zmm5, zmmword ptr [edx - 8192]
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, dword ptr [edx + 508]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 zmm6, zmm5, dword ptr [edx + 508]{1to16}
+
+// CHECK: vcvtne2ps2bf16 zmm6, zmm5, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x57,0x58,0x72,0x72,0x80]
+ vcvtne2ps2bf16 zmm6, zmm5, dword ptr [edx - 512]{1to16}
+
+// CHECK: vcvtneps2bf16 ymm6, zmm5
+// CHECK: encoding: [0x62,0xf2,0x7e,0x48,0x72,0xf5]
+ vcvtneps2bf16 ymm6, zmm5
+
+// CHECK: vcvtneps2bf16 ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x7e,0x4f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtneps2bf16 ymm6 {k7}, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtneps2bf16 ymm6, dword ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x58,0x72,0x31]
+ vcvtneps2bf16 ymm6, dword ptr [ecx]{1to16}
+
+// CHECK: vcvtneps2bf16 ymm6, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x7e,0x48,0x72,0x71,0x7f]
+ vcvtneps2bf16 ymm6, zmmword ptr [ecx + 8128]
+
+// CHECK: vcvtneps2bf16 ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x7e,0xdf,0x72,0x72,0x80]
+ vcvtneps2bf16 ymm6 {k7} {z}, dword ptr [edx - 512]{1to16}
+
+// CHECK: vdpbf16ps zmm6, zmm5, zmm4
+// CHECK: encoding: [0x62,0xf2,0x56,0x48,0x52,0xf4]
+ vdpbf16ps zmm6, zmm5, zmm4
+
+// CHECK: vdpbf16ps zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x56,0x4f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vdpbf16ps zmm6 {k7}, zmm5, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vdpbf16ps zmm6, zmm5, dword ptr [ecx]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x56,0x58,0x52,0x31]
+ vdpbf16ps zmm6, zmm5, dword ptr [ecx]{1to16}
+
+// CHECK: vdpbf16ps zmm6, zmm5, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x56,0x48,0x52,0x71,0x7f]
+ vdpbf16ps zmm6, zmm5, zmmword ptr [ecx + 8128]
+
+// CHECK: vdpbf16ps zmm6 {k7} {z}, zmm5, dword ptr [edx - 512]{1to16}
+// CHECK: encoding: [0x62,0xf2,0x56,0xdf,0x52,0x72,0x80]
+ vdpbf16ps zmm6 {k7} {z}, zmm5, dword ptr [edx - 512]{1to16}
+
diff --git a/test/MC/X86/intel-syntax-avx512_bf16_vl.s b/test/MC/X86/intel-syntax-avx512_bf16_vl.s
new file mode 100644
index 00000000000..d0310fd7028
--- /dev/null
+++ b/test/MC/X86/intel-syntax-avx512_bf16_vl.s
@@ -0,0 +1,170 @@
+// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmm4
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xf4]
+ vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmm4
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7} {z}, xmm5, xmm4
+// CHECK: encoding: [0x62,0xf2,0x57,0x8f,0x72,0xf4]
+ vcvtne2ps2bf16 xmm6 {k7} {z}, xmm5, xmm4
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [ecx]
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x31]
+ vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [ecx]
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 291]
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00]
+ vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 291]
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp - 4]
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff]
+ vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [esp - 4]
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [eax]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x30]
+ vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [eax]{1to4}
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [edx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [edx + 2032]
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0x62,0xf2,0x57,0x0f,0x72,0x72,0x80]
+ vcvtne2ps2bf16 xmm6 {k7}, xmm5, xmmword ptr [edx - 2048]
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [edx + 508]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [edx + 508]{1to4}
+
+// CHECK: vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [edx - 512]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x57,0x1f,0x72,0x72,0x80]
+ vcvtne2ps2bf16 xmm6 {k7}, xmm5, dword ptr [edx - 512]{1to4}
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymm4
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xf4]
+ vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymm4
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7} {z}, ymm5, ymm4
+// CHECK: encoding: [0x62,0xf2,0x57,0xaf,0x72,0xf4]
+ vcvtne2ps2bf16 ymm6 {k7} {z}, ymm5, ymm4
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [ecx]
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x31]
+ vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [ecx]
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 291]
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0xf4,0x23,0x01,0x00,0x00]
+ vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 291]
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp - 4]
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff]
+ vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [esp - 4]
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [eax]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x30]
+ vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [eax]{1to8}
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [edx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [edx + 4064]
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0x62,0xf2,0x57,0x2f,0x72,0x72,0x80]
+ vcvtne2ps2bf16 ymm6 {k7}, ymm5, ymmword ptr [edx - 4096]
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [edx + 508]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [edx + 508]{1to8}
+
+// CHECK: vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [edx - 512]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x57,0x3f,0x72,0x72,0x80]
+ vcvtne2ps2bf16 ymm6 {k7}, ymm5, dword ptr [edx - 512]{1to8}
+
+// CHECK: vcvtneps2bf16 xmm6, xmm5
+// CHECK: encoding: [0x62,0xf2,0x7e,0x08,0x72,0xf5]
+ vcvtneps2bf16 xmm6, xmm5
+
+// CHECK: vcvtneps2bf16 xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x7e,0x0f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtneps2bf16 xmm6 {k7}, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtneps2bf16 xmm6, dword ptr [ecx]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x18,0x72,0x31]
+ vcvtneps2bf16 xmm6, dword ptr [ecx]{1to4}
+
+// CHECK: vcvtneps2bf16 xmm6, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x7e,0x08,0x72,0x71,0x7f]
+ vcvtneps2bf16 xmm6, xmmword ptr [ecx + 2032]
+
+// CHECK: vcvtneps2bf16 xmm6 {k7} {z}, dword ptr [edx - 512]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x9f,0x72,0x72,0x80]
+ vcvtneps2bf16 xmm6 {k7} {z}, dword ptr [edx - 512]{1to4}
+
+// CHECK: vcvtneps2bf16 xmm6, ymm5
+// CHECK: encoding: [0x62,0xf2,0x7e,0x28,0x72,0xf5]
+ vcvtneps2bf16 xmm6, ymm5
+
+// CHECK: vcvtneps2bf16 xmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x7e,0x2f,0x72,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vcvtneps2bf16 xmm6 {k7}, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vcvtneps2bf16 xmm6, dword ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x7e,0x38,0x72,0x31]
+ vcvtneps2bf16 xmm6, dword ptr [ecx]{1to8}
+
+// CHECK: vcvtneps2bf16 xmm6, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x7e,0x28,0x72,0x71,0x7f]
+ vcvtneps2bf16 xmm6, ymmword ptr [ecx + 4064]
+
+// CHECK: vcvtneps2bf16 xmm6 {k7} {z}, dword ptr [edx - 512]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x7e,0xbf,0x72,0x72,0x80]
+ vcvtneps2bf16 xmm6 {k7} {z}, dword ptr [edx - 512]{1to8}
+
+// CHECK: vdpbf16ps ymm6, ymm5, ymm4
+// CHECK: encoding: [0x62,0xf2,0x56,0x28,0x52,0xf4]
+ vdpbf16ps ymm6, ymm5, ymm4
+
+// CHECK: vdpbf16ps ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x56,0x2f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vdpbf16ps ymm6 {k7}, ymm5, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vdpbf16ps ymm6, ymm5, dword ptr [ecx]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x56,0x38,0x52,0x31]
+ vdpbf16ps ymm6, ymm5, dword ptr [ecx]{1to8}
+
+// CHECK: vdpbf16ps ymm6, ymm5, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x56,0x28,0x52,0x71,0x7f]
+ vdpbf16ps ymm6, ymm5, ymmword ptr [ecx + 4064]
+
+// CHECK: vdpbf16ps ymm6 {k7} {z}, ymm5, dword ptr [edx - 512]{1to8}
+// CHECK: encoding: [0x62,0xf2,0x56,0xbf,0x52,0x72,0x80]
+ vdpbf16ps ymm6 {k7} {z}, ymm5, dword ptr [edx - 512]{1to8}
+
+// CHECK: vdpbf16ps xmm6, xmm5, xmm4
+// CHECK: encoding: [0x62,0xf2,0x56,0x08,0x52,0xf4]
+ vdpbf16ps xmm6, xmm5, xmm4
+
+// CHECK: vdpbf16ps xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x56,0x0f,0x52,0xb4,0xf4,0x00,0x00,0x00,0x10]
+ vdpbf16ps xmm6 {k7}, xmm5, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vdpbf16ps xmm6, xmm5, dword ptr [ecx]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x56,0x18,0x52,0x31]
+ vdpbf16ps xmm6, xmm5, dword ptr [ecx]{1to4}
+
+// CHECK: vdpbf16ps xmm6, xmm5, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x56,0x08,0x52,0x71,0x7f]
+ vdpbf16ps xmm6, xmm5, xmmword ptr [ecx + 2032]
+
+// CHECK: vdpbf16ps xmm6 {k7} {z}, xmm5, dword ptr [edx - 512]{1to4}
+// CHECK: encoding: [0x62,0xf2,0x56,0x9f,0x52,0x72,0x80]
+ vdpbf16ps xmm6 {k7} {z}, xmm5, dword ptr [edx - 512]{1to4}
+
diff --git a/test/MC/X86/intel-syntax-x86-64-avx512_bf16.s b/test/MC/X86/intel-syntax-x86-64-avx512_bf16.s
new file mode 100644
index 00000000000..faaeaa7321d
--- /dev/null
+++ b/test/MC/X86/intel-syntax-x86-64-avx512_bf16.s
@@ -0,0 +1,90 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmm28
+// CHECK: encoding: [0x62,0x02,0x17,0x40,0x72,0xf4]
+ vcvtne2ps2bf16 zmm30, zmm29, zmm28
+
+// CHECK: vcvtne2ps2bf16 zmm30 {k7}, zmm29, zmm28
+// CHECK: encoding: [0x62,0x02,0x17,0x47,0x72,0xf4]
+ vcvtne2ps2bf16 zmm30 {k7}, zmm29, zmm28
+
+// CHECK: vcvtne2ps2bf16 zmm30 {k7} {z}, zmm29, zmm28
+// CHECK: encoding: [0x62,0x02,0x17,0xc7,0x72,0xf4]
+ vcvtne2ps2bf16 zmm30 {k7} {z}, zmm29, zmm28
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rcx]
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x31]
+ vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rcx]
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rax + 8*r14 + 291]
+// CHECK: encoding: [0x62,0x22,0x17,0x40,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rax + 8*r14 + 291]
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rax + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x17,0x40,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rax + 8*r14 + 268435456]
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rsp - 4]
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff]
+ vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rsp - 4]
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rcx]{1to16}
+// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x31]
+ vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rcx]{1to16}
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rdx + 8128]
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rdx + 8128]
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x72,0x80]
+ vcvtne2ps2bf16 zmm30, zmm29, zmmword ptr [rdx - 8192]
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rdx + 508]{1to16}
+// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rdx + 508]{1to16}
+
+// CHECK: vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rdx - 512]{1to16}
+// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x72,0x80]
+ vcvtne2ps2bf16 zmm30, zmm29, dword ptr [rdx - 512]{1to16}
+
+// CHECK: vcvtneps2bf16 ymm30, zmm29
+// CHECK: encoding: [0x62,0x02,0x7e,0x48,0x72,0xf5]
+ vcvtneps2bf16 ymm30, zmm29
+
+// CHECK: vcvtneps2bf16 ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x7e,0x4f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtneps2bf16 ymm30 {k7}, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtneps2bf16 ymm30, dword ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x42,0x7e,0x58,0x72,0x31]
+ vcvtneps2bf16 ymm30, dword ptr [r9]{1to16}
+
+// CHECK: vcvtneps2bf16 ymm30, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x72,0x71,0x7f]
+ vcvtneps2bf16 ymm30, zmmword ptr [rcx + 8128]
+
+// CHECK: vcvtneps2bf16 ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16}
+// CHECK: encoding: [0x62,0x62,0x7e,0xdf,0x72,0x72,0x80]
+ vcvtneps2bf16 ymm30 {k7} {z}, dword ptr [rdx - 512]{1to16}
+
+// CHECK: vdpbf16ps zmm30, zmm29, zmm28
+// CHECK: encoding: [0x62,0x02,0x16,0x40,0x52,0xf4]
+ vdpbf16ps zmm30, zmm29, zmm28
+
+// CHECK: vdpbf16ps zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x16,0x47,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdpbf16ps zmm30 {k7}, zmm29, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vdpbf16ps zmm30, zmm29, dword ptr [r9]{1to16}
+// CHECK: encoding: [0x62,0x42,0x16,0x50,0x52,0x31]
+ vdpbf16ps zmm30, zmm29, dword ptr [r9]{1to16}
+
+// CHECK: vdpbf16ps zmm30, zmm29, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0x62,0x16,0x40,0x52,0x71,0x7f]
+ vdpbf16ps zmm30, zmm29, zmmword ptr [rcx + 8128]
+
+// CHECK: vdpbf16ps zmm30 {k7} {z}, zmm29, dword ptr [rdx - 512]{1to16}
+// CHECK: encoding: [0x62,0x62,0x16,0xd7,0x52,0x72,0x80]
+ vdpbf16ps zmm30 {k7} {z}, zmm29, dword ptr [rdx - 512]{1to16}
+
diff --git a/test/MC/X86/intel-syntax-x86-64-avx512_bf16_vl.s b/test/MC/X86/intel-syntax-x86-64-avx512_bf16_vl.s
new file mode 100644
index 00000000000..e8c531175e1
--- /dev/null
+++ b/test/MC/X86/intel-syntax-x86-64-avx512_bf16_vl.s
@@ -0,0 +1,178 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmm28
+// CHECK: encoding: [0x62,0x02,0x17,0x00,0x72,0xf4]
+ vcvtne2ps2bf16 xmm30, xmm29, xmm28
+
+// CHECK: vcvtne2ps2bf16 xmm30 {k7}, xmm29, xmm28
+// CHECK: encoding: [0x62,0x02,0x17,0x07,0x72,0xf4]
+ vcvtne2ps2bf16 xmm30 {k7}, xmm29, xmm28
+
+// CHECK: vcvtne2ps2bf16 xmm30 {k7} {z}, xmm29, xmm28
+// CHECK: encoding: [0x62,0x02,0x17,0x87,0x72,0xf4]
+ vcvtne2ps2bf16 xmm30 {k7} {z}, xmm29, xmm28
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rcx]
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x31]
+ vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rcx]
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rax + 8*r14 + 291]
+// CHECK: encoding: [0x62,0x22,0x17,0x00,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rax + 8*r14 + 291]
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rax + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x17,0x00,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rax + 8*r14 + 268435456]
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rsp - 4]
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff]
+ vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rsp - 4]
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rcx]{1to4}
+// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x31]
+ vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rcx]{1to4}
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rdx + 2032]
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rdx + 2032]
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x72,0x80]
+ vcvtne2ps2bf16 xmm30, xmm29, xmmword ptr [rdx - 2048]
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rdx + 508]{1to4}
+// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rdx + 508]{1to4}
+
+// CHECK: vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x72,0x80]
+ vcvtne2ps2bf16 xmm30, xmm29, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymm28
+// CHECK: encoding: [0x62,0x02,0x17,0x20,0x72,0xf4]
+ vcvtne2ps2bf16 ymm30, ymm29, ymm28
+
+// CHECK: vcvtne2ps2bf16 ymm30 {k7}, ymm29, ymm28
+// CHECK: encoding: [0x62,0x02,0x17,0x27,0x72,0xf4]
+ vcvtne2ps2bf16 ymm30 {k7}, ymm29, ymm28
+
+// CHECK: vcvtne2ps2bf16 ymm30 {k7} {z}, ymm29, ymm28
+// CHECK: encoding: [0x62,0x02,0x17,0xa7,0x72,0xf4]
+ vcvtne2ps2bf16 ymm30 {k7} {z}, ymm29, ymm28
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rcx]
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x31]
+ vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rcx]
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rax + 8*r14 + 291]
+// CHECK: encoding: [0x62,0x22,0x17,0x20,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rax + 8*r14 + 291]
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rax + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x17,0x20,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rax + 8*r14 + 268435456]
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rsp - 4]
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0xb4,0x24,0xfc,0xff,0xff,0xff]
+ vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rsp - 4]
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rcx]{1to8}
+// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x31]
+ vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rcx]{1to8}
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rdx + 4064]
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rdx + 4064]
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x72,0x80]
+ vcvtne2ps2bf16 ymm30, ymm29, ymmword ptr [rdx - 4096]
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rdx + 508]{1to8}
+// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rdx + 508]{1to8}
+
+// CHECK: vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x72,0x80]
+ vcvtne2ps2bf16 ymm30, ymm29, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vcvtneps2bf16 xmm30, xmm29
+// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x72,0xf5]
+ vcvtneps2bf16 xmm30, xmm29
+
+// CHECK: vcvtneps2bf16 xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x7e,0x0f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtneps2bf16 xmm30 {k7}, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtneps2bf16 xmm30, dword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x42,0x7e,0x18,0x72,0x31]
+ vcvtneps2bf16 xmm30, dword ptr [r9]{1to4}
+
+// CHECK: vcvtneps2bf16 xmm30, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x72,0x71,0x7f]
+ vcvtneps2bf16 xmm30, xmmword ptr [rcx + 2032]
+
+// CHECK: vcvtneps2bf16 xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0x62,0x7e,0x9f,0x72,0x72,0x80]
+ vcvtneps2bf16 xmm30 {k7} {z}, dword ptr [rdx - 512]{1to4}
+
+// CHECK: vcvtneps2bf16 xmm30, ymm29
+// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x72,0xf5]
+ vcvtneps2bf16 xmm30, ymm29
+
+// CHECK: vcvtneps2bf16 xmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x7e,0x2f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtneps2bf16 xmm30 {k7}, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vcvtneps2bf16 xmm30, dword ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x42,0x7e,0x38,0x72,0x31]
+ vcvtneps2bf16 xmm30, dword ptr [r9]{1to8}
+
+// CHECK: vcvtneps2bf16 xmm30, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x72,0x71,0x7f]
+ vcvtneps2bf16 xmm30, ymmword ptr [rcx + 4064]
+
+// CHECK: vcvtneps2bf16 xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0x62,0x7e,0xbf,0x72,0x72,0x80]
+ vcvtneps2bf16 xmm30 {k7} {z}, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vdpbf16ps ymm30, ymm29, ymm28
+// CHECK: encoding: [0x62,0x02,0x16,0x20,0x52,0xf4]
+ vdpbf16ps ymm30, ymm29, ymm28
+
+// CHECK: vdpbf16ps ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x16,0x27,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdpbf16ps ymm30 {k7}, ymm29, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vdpbf16ps ymm30, ymm29, dword ptr [r9]{1to8}
+// CHECK: encoding: [0x62,0x42,0x16,0x30,0x52,0x31]
+ vdpbf16ps ymm30, ymm29, dword ptr [r9]{1to8}
+
+// CHECK: vdpbf16ps ymm30, ymm29, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0x62,0x16,0x20,0x52,0x71,0x7f]
+ vdpbf16ps ymm30, ymm29, ymmword ptr [rcx + 4064]
+
+// CHECK: vdpbf16ps ymm30 {k7} {z}, ymm29, dword ptr [rdx - 512]{1to8}
+// CHECK: encoding: [0x62,0x62,0x16,0xb7,0x52,0x72,0x80]
+ vdpbf16ps ymm30 {k7} {z}, ymm29, dword ptr [rdx - 512]{1to8}
+
+// CHECK: vdpbf16ps xmm30, xmm29, xmm28
+// CHECK: encoding: [0x62,0x02,0x16,0x00,0x52,0xf4]
+ vdpbf16ps xmm30, xmm29, xmm28
+
+// CHECK: vdpbf16ps xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0x22,0x16,0x07,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdpbf16ps xmm30 {k7}, xmm29, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vdpbf16ps xmm30, xmm29, dword ptr [r9]{1to4}
+// CHECK: encoding: [0x62,0x42,0x16,0x10,0x52,0x31]
+ vdpbf16ps xmm30, xmm29, dword ptr [r9]{1to4}
+
+// CHECK: vdpbf16ps xmm30, xmm29, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0x62,0x16,0x00,0x52,0x71,0x7f]
+ vdpbf16ps xmm30, xmm29, xmmword ptr [rcx + 2032]
+
+// CHECK: vdpbf16ps xmm30 {k7} {z}, xmm29, dword ptr [rdx - 512]{1to4}
+// CHECK: encoding: [0x62,0x62,0x16,0x97,0x52,0x72,0x80]
+ vdpbf16ps xmm30 {k7} {z}, xmm29, dword ptr [rdx - 512]{1to4}
+
diff --git a/test/MC/X86/x86-64-avx512_bf16-encoding.s b/test/MC/X86/x86-64-avx512_bf16-encoding.s
new file mode 100644
index 00000000000..dcd8f79fcb1
--- /dev/null
+++ b/test/MC/X86/x86-64-avx512_bf16-encoding.s
@@ -0,0 +1,90 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding < %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x02,0x17,0x40,0x72,0xf4]
+ vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x02,0x17,0x47,0x72,0xf4]
+ vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30 {%k7}
+
+// CHECK: vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x02,0x17,0xc7,0x72,0xf4]
+ vcvtne2ps2bf16 %zmm28, %zmm29, %zmm30 {%k7} {z}
+
+// CHECK: vcvtne2ps2bf16 (%rcx), %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x31]
+ vcvtne2ps2bf16 (%rcx), %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16 291(%rax,%r14,8), %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x22,0x17,0x40,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vcvtne2ps2bf16 291(%rax,%r14,8), %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16 268435456(%rax,%r14,8), %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x22,0x17,0x40,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ vcvtne2ps2bf16 268435456(%rax,%r14,8), %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16 -64(%rsp), %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x74,0x24,0xff]
+ vcvtne2ps2bf16 -64(%rsp), %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16 (%rcx){1to16}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x31]
+ vcvtne2ps2bf16 (%rcx){1to16}, %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16 8128(%rdx), %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 8128(%rdx), %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16 -8192(%rdx), %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x40,0x72,0x72,0x80]
+ vcvtne2ps2bf16 -8192(%rdx), %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16 508(%rdx){1to16}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 508(%rdx){1to16}, %zmm29, %zmm30
+
+// CHECK: vcvtne2ps2bf16 -512(%rdx){1to16}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x50,0x72,0x72,0x80]
+ vcvtne2ps2bf16 -512(%rdx){1to16}, %zmm29, %zmm30
+
+// CHECK: vcvtneps2bf16 %zmm29, %ymm30
+// CHECK: encoding: [0x62,0x02,0x7e,0x48,0x72,0xf5]
+ vcvtneps2bf16 %zmm29, %ymm30
+
+// CHECK: vcvtneps2bf16 268435456(%rbp,%r14,8), %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7e,0x4f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtneps2bf16 268435456(%rbp,%r14,8), %ymm30 {%k7}
+
+// CHECK: vcvtneps2bf16 (%r9){1to16}, %ymm30
+// CHECK: encoding: [0x62,0x42,0x7e,0x58,0x72,0x31]
+ vcvtneps2bf16 (%r9){1to16}, %ymm30
+
+// CHECK: vcvtneps2bf16 8128(%rcx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7e,0x48,0x72,0x71,0x7f]
+ vcvtneps2bf16 8128(%rcx), %ymm30
+
+// CHECK: vcvtneps2bf16 -512(%rdx){1to16}, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x62,0x7e,0xdf,0x72,0x72,0x80]
+ vcvtneps2bf16 -512(%rdx){1to16}, %ymm30 {%k7} {z}
+
+// CHECK: vdpbf16ps %zmm28, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x02,0x16,0x40,0x52,0xf4]
+ vdpbf16ps %zmm28, %zmm29, %zmm30
+
+// CHECK: vdpbf16ps 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x16,0x47,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdpbf16ps 268435456(%rbp,%r14,8), %zmm29, %zmm30 {%k7}
+
+// CHECK: vdpbf16ps (%r9){1to16}, %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x42,0x16,0x50,0x52,0x31]
+ vdpbf16ps (%r9){1to16}, %zmm29, %zmm30
+
+// CHECK: vdpbf16ps 8128(%rcx), %zmm29, %zmm30
+// CHECK: encoding: [0x62,0x62,0x16,0x40,0x52,0x71,0x7f]
+ vdpbf16ps 8128(%rcx), %zmm29, %zmm30
+
+// CHECK: vdpbf16ps -512(%rdx){1to16}, %zmm29, %zmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x62,0x16,0xd7,0x52,0x72,0x80]
+ vdpbf16ps -512(%rdx){1to16}, %zmm29, %zmm30 {%k7} {z}
+
diff --git a/test/MC/X86/x86-64-avx512_bf16_vl-encoding.s b/test/MC/X86/x86-64-avx512_bf16_vl-encoding.s
new file mode 100644
index 00000000000..041a69008af
--- /dev/null
+++ b/test/MC/X86/x86-64-avx512_bf16_vl-encoding.s
@@ -0,0 +1,178 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding < %s | FileCheck %s
+
+// CHECK: vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x02,0x17,0x00,0x72,0xf4]
+ vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x02,0x17,0x07,0x72,0xf4]
+ vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30 {%k7}
+
+// CHECK: vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x02,0x17,0x87,0x72,0xf4]
+ vcvtne2ps2bf16 %xmm28, %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vcvtne2ps2bf16 (%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x31]
+ vcvtne2ps2bf16 (%rcx), %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16 291(%rax,%r14,8), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x22,0x17,0x00,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vcvtne2ps2bf16 291(%rax,%r14,8), %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16 268435456(%rax,%r14,8), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x22,0x17,0x00,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ vcvtne2ps2bf16 268435456(%rax,%r14,8), %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16 -16(%rsp), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x74,0x24,0xff]
+ vcvtne2ps2bf16 -16(%rsp), %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16 (%rcx){1to4}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x31]
+ vcvtne2ps2bf16 (%rcx){1to4}, %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16 2032(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 2032(%rdx), %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16 -2048(%rdx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x00,0x72,0x72,0x80]
+ vcvtne2ps2bf16 -2048(%rdx), %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16 508(%rdx){1to4}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 508(%rdx){1to4}, %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16 -512(%rdx){1to4}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x17,0x10,0x72,0x72,0x80]
+ vcvtne2ps2bf16 -512(%rdx){1to4}, %xmm29, %xmm30
+
+// CHECK: vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x02,0x17,0x20,0x72,0xf4]
+ vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x02,0x17,0x27,0x72,0xf4]
+ vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30 {%k7}
+
+// CHECK: vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x02,0x17,0xa7,0x72,0xf4]
+ vcvtne2ps2bf16 %ymm28, %ymm29, %ymm30 {%k7} {z}
+
+// CHECK: vcvtne2ps2bf16 (%rcx), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x31]
+ vcvtne2ps2bf16 (%rcx), %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16 291(%rax,%r14,8), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x22,0x17,0x20,0x72,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vcvtne2ps2bf16 291(%rax,%r14,8), %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16 268435456(%rax,%r14,8), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x22,0x17,0x20,0x72,0xb4,0xf0,0x00,0x00,0x00,0x10]
+ vcvtne2ps2bf16 268435456(%rax,%r14,8), %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16 -32(%rsp), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x74,0x24,0xff]
+ vcvtne2ps2bf16 -32(%rsp), %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16 (%rcx){1to8}, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x31]
+ vcvtne2ps2bf16 (%rcx){1to8}, %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16 4064(%rdx), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 4064(%rdx), %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16 -4096(%rdx), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x17,0x20,0x72,0x72,0x80]
+ vcvtne2ps2bf16 -4096(%rdx), %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16 508(%rdx){1to8}, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x72,0x7f]
+ vcvtne2ps2bf16 508(%rdx){1to8}, %ymm29, %ymm30
+
+// CHECK: vcvtne2ps2bf16 -512(%rdx){1to8}, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x17,0x30,0x72,0x72,0x80]
+ vcvtne2ps2bf16 -512(%rdx){1to8}, %ymm29, %ymm30
+
+// CHECK: vcvtneps2bf16 %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7e,0x08,0x72,0xf5]
+ vcvtneps2bf16 %xmm29, %xmm30
+
+// CHECK: vcvtneps2bf16x 268435456(%rbp,%r14,8), %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7e,0x0f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtneps2bf16x 268435456(%rbp,%r14,8), %xmm30 {%k7}
+
+// CHECK: vcvtneps2bf16 (%r9){1to4}, %xmm30
+// CHECK: encoding: [0x62,0x42,0x7e,0x18,0x72,0x31]
+ vcvtneps2bf16 (%r9){1to4}, %xmm30
+
+// CHECK: vcvtneps2bf16x 2032(%rcx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7e,0x08,0x72,0x71,0x7f]
+ vcvtneps2bf16x 2032(%rcx), %xmm30
+
+// CHECK: vcvtneps2bf16 -512(%rdx){1to4}, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x62,0x7e,0x9f,0x72,0x72,0x80]
+ vcvtneps2bf16 -512(%rdx){1to4}, %xmm30 {%k7} {z}
+
+// CHECK: vcvtneps2bf16 %ymm29, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7e,0x28,0x72,0xf5]
+ vcvtneps2bf16 %ymm29, %xmm30
+
+// CHECK: vcvtneps2bf16y 268435456(%rbp,%r14,8), %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7e,0x2f,0x72,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vcvtneps2bf16y 268435456(%rbp,%r14,8), %xmm30 {%k7}
+
+// CHECK: vcvtneps2bf16 (%r9){1to8}, %xmm30
+// CHECK: encoding: [0x62,0x42,0x7e,0x38,0x72,0x31]
+ vcvtneps2bf16 (%r9){1to8}, %xmm30
+
+// CHECK: vcvtneps2bf16y 4064(%rcx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7e,0x28,0x72,0x71,0x7f]
+ vcvtneps2bf16y 4064(%rcx), %xmm30
+
+// CHECK: vcvtneps2bf16 -512(%rdx){1to8}, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x62,0x7e,0xbf,0x72,0x72,0x80]
+ vcvtneps2bf16 -512(%rdx){1to8}, %xmm30 {%k7} {z}
+
+// CHECK: vdpbf16ps %ymm28, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x02,0x16,0x20,0x52,0xf4]
+ vdpbf16ps %ymm28, %ymm29, %ymm30
+
+// CHECK: vdpbf16ps 268435456(%rbp,%r14,8), %ymm29, %ymm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x16,0x27,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdpbf16ps 268435456(%rbp,%r14,8), %ymm29, %ymm30 {%k7}
+
+// CHECK: vdpbf16ps (%r9){1to8}, %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x42,0x16,0x30,0x52,0x31]
+ vdpbf16ps (%r9){1to8}, %ymm29, %ymm30
+
+// CHECK: vdpbf16ps 4064(%rcx), %ymm29, %ymm30
+// CHECK: encoding: [0x62,0x62,0x16,0x20,0x52,0x71,0x7f]
+ vdpbf16ps 4064(%rcx), %ymm29, %ymm30
+
+// CHECK: vdpbf16ps -512(%rdx){1to8}, %ymm29, %ymm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x62,0x16,0xb7,0x52,0x72,0x80]
+ vdpbf16ps -512(%rdx){1to8}, %ymm29, %ymm30 {%k7} {z}
+
+// CHECK: vdpbf16ps %xmm28, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x02,0x16,0x00,0x52,0xf4]
+ vdpbf16ps %xmm28, %xmm29, %xmm30
+
+// CHECK: vdpbf16ps 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x22,0x16,0x07,0x52,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vdpbf16ps 268435456(%rbp,%r14,8), %xmm29, %xmm30 {%k7}
+
+// CHECK: vdpbf16ps (%r9){1to4}, %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x42,0x16,0x10,0x52,0x31]
+ vdpbf16ps (%r9){1to4}, %xmm29, %xmm30
+
+// CHECK: vdpbf16ps 2032(%rcx), %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x62,0x16,0x00,0x52,0x71,0x7f]
+ vdpbf16ps 2032(%rcx), %xmm29, %xmm30
+
+// CHECK: vdpbf16ps -512(%rdx){1to4}, %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x62,0x16,0x97,0x52,0x72,0x80]
+ vdpbf16ps -512(%rdx){1to4}, %xmm29, %xmm30 {%k7} {z}
+
OpenPOWER on IntegriCloud