summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorIgor Breger <igor.breger@intel.com>2015-10-20 11:56:42 +0000
committerIgor Breger <igor.breger@intel.com>2015-10-20 11:56:42 +0000
commit21296d230ac53d16cab8daf5395afdd803f4f277 (patch)
tree69c5dcdd4256c6a08948dcf2e2ebac1947b31c59 /llvm
parent4a8c93ee85932ef8b692cf6d86f0c6c6ae3cfa0d (diff)
downloadbcm5719-llvm-21296d230ac53d16cab8daf5395afdd803f4f277.tar.gz
bcm5719-llvm-21296d230ac53d16cab8daf5395afdd803f4f277.zip
AVX512: Implemented encoding and intrinsics for VPBROADCASTB/W/D/Q instructions.
Differential Revision: http://reviews.llvm.org/D13884 llvm-svn: 250819
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/IR/IntrinsicsX86.td53
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td137
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td54
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h24
-rw-r--r--llvm/test/CodeGen/X86/avx-isa-check.ll21
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics.ll45
-rw-r--r--llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll121
-rw-r--r--llvm/test/CodeGen/X86/avx512vl-intrinsics.ll85
-rw-r--r--llvm/test/MC/X86/avx512-encodings.s124
-rw-r--r--llvm/test/MC/X86/x86-64-avx512bw.s96
-rw-r--r--llvm/test/MC/X86/x86-64-avx512bw_vl.s385
-rw-r--r--llvm/test/MC/X86/x86-64-avx512f_vl.s248
12 files changed, 1272 insertions, 121 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 2bdca6d3b4f..d9c95f25ba5 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -4825,15 +4825,58 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd512">,
Intrinsic<[llvm_v8f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastb_128 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastb128_mask">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastb_256 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastb256_mask">,
+ Intrinsic<[llvm_v32i8_ty],
+ [llvm_v16i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastb_512 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastb512_mask">,
+ Intrinsic<[llvm_v64i8_ty],
+ [llvm_v16i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastw_128 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastw128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastw_256 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastw256_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastw_512 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastw512_mask">,
+ Intrinsic<[llvm_v32i16_ty],
+ [llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastd_128 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastd128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastd_256 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastd256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_pbroadcastd_512 :
- GCCBuiltin<"__builtin_ia32_pbroadcastd512">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+ GCCBuiltin<"__builtin_ia32_pbroadcastd512">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastq_128 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastq128_mask">,
+ Intrinsic<[llvm_v2i64_ty],
+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastq_256 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastq256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_pbroadcastq_512 :
+ GCCBuiltin<"__builtin_ia32_pbroadcastq512">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
def int_x86_avx512_pbroadcastd_i32_512 :
Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_pbroadcastq_512 :
- GCCBuiltin<"__builtin_ia32_pbroadcastq512">,
- Intrinsic<[llvm_v8i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
def int_x86_avx512_pbroadcastq_i64_512 :
Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty], [IntrNoMem]>;
}
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index a5a904873e5..9f57958b2da 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -807,46 +807,45 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
//===---------------------------------------------------------------------===//
// AVX-512 BROADCAST
//---
-multiclass avx512_fp_broadcast<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
- ValueType svt, X86VectorVTInfo _> {
- defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins SrcRC:$src), "vbroadcast"## !subst("p", "s", _.Suffix),
- "$src", "$src", (_.VT (OpNode (svt SrcRC:$src)))>,
- T8PD, EVEX;
- let mayLoad = 1 in {
- defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.ScalarMemOp:$src),
- "vbroadcast"##!subst("p", "s", _.Suffix), "$src", "$src",
- (_.VT (OpNode (_.ScalarLdFrag addr:$src)))>,
- T8PD, EVEX;
- }
+multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
+
+ defm r : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
+ (DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))>,
+ T8PD, EVEX;
+ let mayLoad = 1 in
+ defm m : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
+ (DestInfo.VT (X86VBroadcast
+ (SrcInfo.ScalarLdFrag addr:$src)))>,
+ T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>;
}
-multiclass avx512_fp_broadcast_vl<bits<8> opc, SDNode OpNode,
- AVX512VLVectorVTInfo _> {
- defm Z : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info512>,
+multiclass avx512_fp_broadcast_vl<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _> {
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
EVEX_V512;
let Predicates = [HasVLX] in {
- defm Z256 : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info256>,
- EVEX_V256;
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
+ EVEX_V256;
}
}
let ExeDomain = SSEPackedSingle in {
- defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, X86VBroadcast,
- avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>;
+ defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, "vbroadcastss",
+ avx512vl_f32_info>;
let Predicates = [HasVLX] in {
- defm VBROADCASTSSZ128 : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X,
- v4f32, v4f32x_info>, EVEX_V128,
- EVEX_CD8<32, CD8VT1>;
+ defm VBROADCASTSSZ128 : avx512_broadcast_rm<0x18, "vbroadcastss",
+ v4f32x_info, v4f32x_info>, EVEX_V128;
}
}
let ExeDomain = SSEPackedDouble in {
- defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, X86VBroadcast,
- avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>;
+ defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, "vbroadcastsd",
+ avx512vl_f64_info>, VEX_W;
}
// avx512_broadcast_pat introduces patterns for broadcast with a scalar argument.
@@ -947,50 +946,41 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
(bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
(VPBROADCASTQrZrkz (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
-multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
- X86MemOperand x86memop, PatFrag ld_frag,
- RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
- RegisterClass KRC> {
- def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
- def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
- VR128X:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} |${dst} {${mask}}, $src}"),
- []>, EVEX, EVEX_K;
- def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
- VR128X:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
- let mayLoad = 1 in {
- def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
- def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
- x86memop:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}}|${dst} {${mask}} , $src}"),
- []>, EVEX, EVEX_K;
- def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
- x86memop:$src),
- !strconcat(OpcodeStr,
- "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- [(set DstRC:$dst, (OpVT (vselect KRC:$mask,
- (X86VBroadcast (ld_frag addr:$src)),
- (OpVT (bitconvert (v16i32 immAllZerosV))))))]>, EVEX, EVEX_KZ;
+// Provide aliases for broadcast from the same register class that
+// automatically does the extract.
+multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
+ X86VectorVTInfo SrcInfo> {
+ def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
+ (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
+ (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
+}
+
+multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _, Predicate prd> {
+ let Predicates = [prd] in {
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
+ avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
+ EVEX_V512;
+ // Defined separately to avoid redefinition.
+ defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
+ }
+ let Predicates = [prd, HasVLX] in {
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
+ avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
+ EVEX_V256;
+ defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
+ EVEX_V128;
}
}
-defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
- loadi32, VR512, v16i32, v4i32, VK16WM>,
- EVEX_V512, EVEX_CD8<32, CD8VT1>;
-defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
- loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
- EVEX_CD8<64, CD8VT1>;
+defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
+ avx512vl_i8_info, HasBWI>;
+defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
+ avx512vl_i16_info, HasBWI>;
+defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
+ avx512vl_i32_info, HasAVX512>;
+defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
+ avx512vl_i64_info, HasAVX512>, VEX_W;
multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
@@ -1057,11 +1047,6 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
EVEX_V512, EVEX_CD8<32, CD8VT8>;
}
-def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
- (VPBROADCASTDZrr VR128X:$src)>;
-def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
- (VPBROADCASTQZrr VR128X:$src)>;
-
def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
(VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))),
@@ -1072,16 +1057,6 @@ def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
(VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
-def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))),
- (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
-def : Pat<(v16i32 (X86VBroadcast (v8i32 VR256X:$src))),
- (VPBROADCASTDZrr (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm))>;
-
-def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))),
- (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
-def : Pat<(v8i64 (X86VBroadcast (v4i64 VR256X:$src))),
- (VPBROADCASTQZrr (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm))>;
-
def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
(VBROADCASTSSZr VR128X:$src)>;
def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index dac9e35f4b8..17edb500d66 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -8318,29 +8318,45 @@ defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32,
//
multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
X86MemOperand x86memop, PatFrag ld_frag,
- ValueType OpVT128, ValueType OpVT256> {
- def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ ValueType OpVT128, ValueType OpVT256, Predicate prd> {
+ let Predicates = [HasAVX2, prd] in {
+ def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>,
+ [(set VR128:$dst,
+ (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>,
Sched<[WriteShuffle]>, VEX;
- def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
+ def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,
+ [(set VR128:$dst,
+ (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,
Sched<[WriteLoad]>, VEX;
- def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>,
+ [(set VR256:$dst,
+ (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>,
Sched<[WriteShuffle256]>, VEX, VEX_L;
- def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
+ def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR256:$dst, (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,
+ [(set VR256:$dst,
+ (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,
Sched<[WriteLoad]>, VEX, VEX_L;
+
+ // Provide aliases for broadcast from the same register class that
+ // automatically does the extract.
+ def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))),
+ (!cast<Instruction>(NAME#"Yrr")
+ (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>;
+ }
}
-defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, v16i8, v32i8>;
-defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16, v8i16, v16i16>;
-defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, v4i32, v8i32>;
-defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, v2i64, v4i64>;
+defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
+ v16i8, v32i8, NoVLX_Or_NoBWI>;
+defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
+ v8i16, v16i16, NoVLX_Or_NoBWI>;
+defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
+ v4i32, v8i32, NoVLX>;
+defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
+ v2i64, v4i64, NoVLX>;
let Predicates = [HasAVX2] in {
// loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
@@ -8352,18 +8368,6 @@ let Predicates = [HasAVX2] in {
// Provide aliases for broadcast from the same register class that
// automatically does the extract.
- def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))),
- (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src),
- sub_xmm)))>;
- def : Pat<(v16i16 (X86VBroadcast (v16i16 VR256:$src))),
- (VPBROADCASTWYrr (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src),
- sub_xmm)))>;
- def : Pat<(v8i32 (X86VBroadcast (v8i32 VR256:$src))),
- (VPBROADCASTDYrr (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src),
- sub_xmm)))>;
- def : Pat<(v4i64 (X86VBroadcast (v4i64 VR256:$src))),
- (VPBROADCASTQYrr (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src),
- sub_xmm)))>;
def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),
(VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src),
sub_xmm)))>;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 6e5ca8de555..5a2aa5b51a3 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1564,6 +1564,30 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ,
X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastb_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastb_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastb_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastd_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastd_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastd_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
+ X86_INTRINSIC_DATA(avx512_pbroadcastw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VBROADCAST, 0),
X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
X86_INTRINSIC_DATA(avx512_psll_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSHLDQ, 0),
X86_INTRINSIC_DATA(avx512_psrl_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSRLDQ, 0),
diff --git a/llvm/test/CodeGen/X86/avx-isa-check.ll b/llvm/test/CodeGen/X86/avx-isa-check.ll
index 071891c148f..02b4f37f96a 100644
--- a/llvm/test/CodeGen/X86/avx-isa-check.ll
+++ b/llvm/test/CodeGen/X86/avx-isa-check.ll
@@ -323,3 +323,24 @@ define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
%r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
ret <16 x i8> %r2
}
+
+define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
+ ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
+ ret <4 x i32> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
+ %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+ %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+ ret <16 x i16> %shuffle
+}
+
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index 7b3b62b3615..e7b51434ebd 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -365,12 +365,24 @@ define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
}
declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
-define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
- ; CHECK: vpbroadcastd
- %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
- ret <16 x i32> %res
+define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpbroadcastd %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1)
+ %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask)
+ %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res2, %res3
+ ret <16 x i32> %res4
}
-declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
+declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
; CHECK: vpbroadcastd
@@ -379,12 +391,25 @@ define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
}
declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
-define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
- ; CHECK: vpbroadcastq
- %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
- ret <8 x i64> %res
+define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpbroadcastq %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastq %xmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1)
+ %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask)
+ %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res2, %res3
+ ret <8 x i64> %res4
}
-declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
+declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
; CHECK: vpbroadcastq
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index bb8a1f51cb0..733cb01e7b0 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -4276,3 +4276,124 @@ define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8>
ret <16 x i16> %res4
}
+declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovd %edi, %k1
+; CHECK-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpbroadcastb %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
+; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1)
+ %res1 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask)
+ %res2 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, i32 %mask)
+ %res3 = add <32 x i8> %res, %res1
+ %res4 = add <32 x i8> %res2, %res3
+ ret <32 x i8> %res4
+}
+
+declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpbroadcastb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask)
+ %res2 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, i16 %mask)
+ %res3 = add <16 x i8> %res, %res1
+ %res4 = add <16 x i8> %res2, %res3
+ ret <16 x i8> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpbroadcastw %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1)
+ %res1 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask)
+ %res2 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> zeroinitializer, i16 %mask)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res2, %res3
+ ret <16 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpbroadcastw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask)
+ %res2 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> zeroinitializer, i8 %mask)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res2, %res3
+ ret <8 x i16> %res4
+}
+
+declare <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8>, <64 x i8>, i64)
+
+define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovq %rdi, %k1 ## encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; CHECK-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8]
+; CHECK-NEXT: vpbroadcastb %xmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xd0]
+; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x78,0xc0]
+; CHECK-NEXT: vpaddb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1]
+; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1)
+ %res1 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask)
+ %res2 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> zeroinitializer, i64 %mask)
+ %res3 = add <64 x i8> %res, %res1
+ %res4 = add <64 x i8> %res2, %res3
+ ret <64 x i8> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8]
+; CHECK-NEXT: vpbroadcastw %xmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xd0]
+; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x79,0xc0]
+; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
+; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1)
+ %res1 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask)
+ %res2 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> zeroinitializer, i32 %mask)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res2, %res3
+ ret <32 x i16> %res4
+}
+
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index a600057c909..96c860cf5d2 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -5184,3 +5184,88 @@ define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i6
%res2 = add <4 x i64> %res, %res1
ret <4 x i64> %res2
}
+
+declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpbroadcastd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 -1)
+ %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask)
+ %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res2, %res3
+ ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpbroadcastd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
+ %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask)
+ %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res2, %res3
+ ret <4 x i32> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpbroadcastq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1)
+ %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask)
+ %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res2, %res3
+ ret <4 x i64> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpbroadcastq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1)
+ %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask)
+ %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res2, %res3
+ ret <2 x i64> %res4
+}
+
diff --git a/llvm/test/MC/X86/avx512-encodings.s b/llvm/test/MC/X86/avx512-encodings.s
index 2a9bf16d378..d39b16ffa32 100644
--- a/llvm/test/MC/X86/avx512-encodings.s
+++ b/llvm/test/MC/X86/avx512-encodings.s
@@ -17733,3 +17733,127 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0xba,0xf8,0xfb,0xff,0xff,0x7b]
vpternlogq $0x7b, -1032(%rdx){1to8}, %zmm2, %zmm15
+// CHECK: vpbroadcastd (%rcx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x11]
+ vpbroadcastd (%rcx), %zmm26
+
+// CHECK: vpbroadcastd (%rcx), %zmm26 {%k2}
+// CHECK: encoding: [0x62,0x62,0x7d,0x4a,0x58,0x11]
+ vpbroadcastd (%rcx), %zmm26 {%k2}
+
+// CHECK: vpbroadcastd (%rcx), %zmm26 {%k2} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0xca,0x58,0x11]
+ vpbroadcastd (%rcx), %zmm26 {%k2} {z}
+
+// CHECK: vpbroadcastd 291(%rax,%r14,8), %zmm26
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x58,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastd 291(%rax,%r14,8), %zmm26
+
+// CHECK: vpbroadcastd 508(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x52,0x7f]
+ vpbroadcastd 508(%rdx), %zmm26
+
+// CHECK: vpbroadcastd 512(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x92,0x00,0x02,0x00,0x00]
+ vpbroadcastd 512(%rdx), %zmm26
+
+// CHECK: vpbroadcastd -512(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x52,0x80]
+ vpbroadcastd -512(%rdx), %zmm26
+
+// CHECK: vpbroadcastd -516(%rdx), %zmm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x92,0xfc,0xfd,0xff,0xff]
+ vpbroadcastd -516(%rdx), %zmm26
+
+// CHECK: vpbroadcastd %xmm22, %zmm10
+// CHECK: encoding: [0x62,0x32,0x7d,0x48,0x58,0xd6]
+ vpbroadcastd %xmm22, %zmm10
+
+// CHECK: vpbroadcastd %xmm22, %zmm10 {%k7}
+// CHECK: encoding: [0x62,0x32,0x7d,0x4f,0x58,0xd6]
+ vpbroadcastd %xmm22, %zmm10 {%k7}
+
+// CHECK: vpbroadcastd %xmm22, %zmm10 {%k7} {z}
+// CHECK: encoding: [0x62,0x32,0x7d,0xcf,0x58,0xd6]
+ vpbroadcastd %xmm22, %zmm10 {%k7} {z}
+
+// CHECK: vpbroadcastd %eax, %zmm11
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x7c,0xd8]
+ vpbroadcastd %eax, %zmm11
+
+// CHECK: vpbroadcastd %eax, %zmm11 {%k6}
+// CHECK: encoding: [0x62,0x72,0x7d,0x4e,0x7c,0xd8]
+ vpbroadcastd %eax, %zmm11 {%k6}
+
+// CHECK: vpbroadcastd %eax, %zmm11 {%k6} {z}
+// CHECK: encoding: [0x62,0x72,0x7d,0xce,0x7c,0xd8]
+ vpbroadcastd %eax, %zmm11 {%k6} {z}
+
+// CHECK: vpbroadcastd %ebp, %zmm11
+// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x7c,0xdd]
+ vpbroadcastd %ebp, %zmm11
+
+// CHECK: vpbroadcastd %r13d, %zmm11
+// CHECK: encoding: [0x62,0x52,0x7d,0x48,0x7c,0xdd]
+ vpbroadcastd %r13d, %zmm11
+
+// CHECK: vpbroadcastq (%rcx), %zmm25
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x09]
+ vpbroadcastq (%rcx), %zmm25
+
+// CHECK: vpbroadcastq (%rcx), %zmm25 {%k2}
+// CHECK: encoding: [0x62,0x62,0xfd,0x4a,0x59,0x09]
+ vpbroadcastq (%rcx), %zmm25 {%k2}
+
+// CHECK: vpbroadcastq (%rcx), %zmm25 {%k2} {z}
+// CHECK: encoding: [0x62,0x62,0xfd,0xca,0x59,0x09]
+ vpbroadcastq (%rcx), %zmm25 {%k2} {z}
+
+// CHECK: vpbroadcastq 291(%rax,%r14,8), %zmm25
+// CHECK: encoding: [0x62,0x22,0xfd,0x48,0x59,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastq 291(%rax,%r14,8), %zmm25
+
+// CHECK: vpbroadcastq 1016(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x4a,0x7f]
+ vpbroadcastq 1016(%rdx), %zmm25
+
+// CHECK: vpbroadcastq 1024(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x8a,0x00,0x04,0x00,0x00]
+ vpbroadcastq 1024(%rdx), %zmm25
+
+// CHECK: vpbroadcastq -1024(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x4a,0x80]
+ vpbroadcastq -1024(%rdx), %zmm25
+
+// CHECK: vpbroadcastq -1032(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x8a,0xf8,0xfb,0xff,0xff]
+ vpbroadcastq -1032(%rdx), %zmm25
+
+// CHECK: vpbroadcastq %xmm5, %zmm3
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x59,0xdd]
+ vpbroadcastq %xmm5, %zmm3
+
+// CHECK: vpbroadcastq %xmm5, %zmm3 {%k5}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x4d,0x59,0xdd]
+ vpbroadcastq %xmm5, %zmm3 {%k5}
+
+// CHECK: vpbroadcastq %xmm5, %zmm3 {%k5} {z}
+// CHECK: encoding: [0x62,0xf2,0xfd,0xcd,0x59,0xdd]
+ vpbroadcastq %xmm5, %zmm3 {%k5} {z}
+
+// CHECK: vpbroadcastq %rax, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x7c,0xc8]
+ vpbroadcastq %rax, %zmm1
+
+// CHECK: vpbroadcastq %rax, %zmm1 {%k6}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x4e,0x7c,0xc8]
+ vpbroadcastq %rax, %zmm1 {%k6}
+
+// CHECK: vpbroadcastq %rax, %zmm1 {%k6} {z}
+// CHECK: encoding: [0x62,0xf2,0xfd,0xce,0x7c,0xc8]
+ vpbroadcastq %rax, %zmm1 {%k6} {z}
+
+// CHECK: vpbroadcastq %r8, %zmm1
+// CHECK: encoding: [0x62,0xd2,0xfd,0x48,0x7c,0xc8]
+ vpbroadcastq %r8, %zmm1
+
diff --git a/llvm/test/MC/X86/x86-64-avx512bw.s b/llvm/test/MC/X86/x86-64-avx512bw.s
index d968986f815..d6be0057354 100644
--- a/llvm/test/MC/X86/x86-64-avx512bw.s
+++ b/llvm/test/MC/X86/x86-64-avx512bw.s
@@ -4595,3 +4595,99 @@
// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x70,0x92,0xc0,0xdf,0xff,0xff,0x7b]
vpshufhw $123, -8256(%rdx), %zmm18
+// CHECK: vpbroadcastb %xmm23, %zmm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x78,0xcf]
+ vpbroadcastb %xmm23, %zmm25
+
+// CHECK: vpbroadcastb %xmm23, %zmm25 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7d,0x4f,0x78,0xcf]
+ vpbroadcastb %xmm23, %zmm25 {%k7}
+
+// CHECK: vpbroadcastb %xmm23, %zmm25 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xcf,0x78,0xcf]
+ vpbroadcastb %xmm23, %zmm25 {%k7} {z}
+
+// CHECK: vpbroadcastb (%rcx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x09]
+ vpbroadcastb (%rcx), %zmm25
+
+// CHECK: vpbroadcastb 291(%rax,%r14,8), %zmm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x78,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastb 291(%rax,%r14,8), %zmm25
+
+// CHECK: vpbroadcastb 127(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x4a,0x7f]
+ vpbroadcastb 127(%rdx), %zmm25
+
+// CHECK: vpbroadcastb 128(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x8a,0x80,0x00,0x00,0x00]
+ vpbroadcastb 128(%rdx), %zmm25
+
+// CHECK: vpbroadcastb -128(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x4a,0x80]
+ vpbroadcastb -128(%rdx), %zmm25
+
+// CHECK: vpbroadcastb -129(%rdx), %zmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x8a,0x7f,0xff,0xff,0xff]
+ vpbroadcastb -129(%rdx), %zmm25
+
+// CHECK: vpbroadcastb %eax, %zmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x7a,0xd8]
+ vpbroadcastb %eax, %zmm19
+
+// CHECK: vpbroadcastb %eax, %zmm19 {%k7}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x4f,0x7a,0xd8]
+ vpbroadcastb %eax, %zmm19 {%k7}
+
+// CHECK: vpbroadcastb %eax, %zmm19 {%k7} {z}
+// CHECK: encoding: [0x62,0xe2,0x7d,0xcf,0x7a,0xd8]
+ vpbroadcastb %eax, %zmm19 {%k7} {z}
+
+// CHECK: vpbroadcastw %xmm19, %zmm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x79,0xf3]
+ vpbroadcastw %xmm19, %zmm30
+
+// CHECK: vpbroadcastw %xmm19, %zmm30 {%k4}
+// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0x79,0xf3]
+ vpbroadcastw %xmm19, %zmm30 {%k4}
+
+// CHECK: vpbroadcastw %xmm19, %zmm30 {%k4} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0x79,0xf3]
+ vpbroadcastw %xmm19, %zmm30 {%k4} {z}
+
+// CHECK: vpbroadcastw (%rcx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0x31]
+ vpbroadcastw (%rcx), %zmm30
+
+// CHECK: vpbroadcastw 291(%rax,%r14,8), %zmm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x79,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastw 291(%rax,%r14,8), %zmm30
+
+// CHECK: vpbroadcastw 254(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0x72,0x7f]
+ vpbroadcastw 254(%rdx), %zmm30
+
+// CHECK: vpbroadcastw 256(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0xb2,0x00,0x01,0x00,0x00]
+ vpbroadcastw 256(%rdx), %zmm30
+
+// CHECK: vpbroadcastw -256(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0x72,0x80]
+ vpbroadcastw -256(%rdx), %zmm30
+
+// CHECK: vpbroadcastw -258(%rdx), %zmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0xb2,0xfe,0xfe,0xff,0xff]
+ vpbroadcastw -258(%rdx), %zmm30
+
+// CHECK: vpbroadcastw %eax, %zmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x7b,0xc0]
+ vpbroadcastw %eax, %zmm24
+
+// CHECK: vpbroadcastw %eax, %zmm24 {%k1}
+// CHECK: encoding: [0x62,0x62,0x7d,0x49,0x7b,0xc0]
+ vpbroadcastw %eax, %zmm24 {%k1}
+
+// CHECK: vpbroadcastw %eax, %zmm24 {%k1} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0xc9,0x7b,0xc0]
+ vpbroadcastw %eax, %zmm24 {%k1} {z}
+
diff --git a/llvm/test/MC/X86/x86-64-avx512bw_vl.s b/llvm/test/MC/X86/x86-64-avx512bw_vl.s
index cd57ce1d55a..75802059be7 100644
--- a/llvm/test/MC/X86/x86-64-avx512bw_vl.s
+++ b/llvm/test/MC/X86/x86-64-avx512bw_vl.s
@@ -9246,3 +9246,388 @@
// CHECK: vpsadbw -4128(%rdx), %ymm26, %ymm20
// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xf6,0xa2,0xe0,0xef,0xff,0xff]
vpsadbw -4128(%rdx), %ymm26, %ymm20
+
+// CHECK: vpbroadcastb %xmm28, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x78,0xf4]
+ vpbroadcastb %xmm28, %xmm30
+
+// CHECK: vpbroadcastb %xmm28, %xmm30 {%k4}
+// CHECK: encoding: [0x62,0x02,0x7d,0x0c,0x78,0xf4]
+ vpbroadcastb %xmm28, %xmm30 {%k4}
+
+// CHECK: vpbroadcastb %xmm28, %xmm30 {%k4} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0x8c,0x78,0xf4]
+ vpbroadcastb %xmm28, %xmm30 {%k4} {z}
+
+// CHECK: vpbroadcastb (%rcx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0x31]
+ vpbroadcastb (%rcx), %xmm30
+
+// CHECK: vpbroadcastb 4660(%rax,%r14,8), %xmm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x78,0xb4,0xf0,0x34,0x12,0x00,0x00]
+ vpbroadcastb 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vpbroadcastb 127(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0x72,0x7f]
+ vpbroadcastb 127(%rdx), %xmm30
+
+// CHECK: vpbroadcastb 128(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0xb2,0x80,0x00,0x00,0x00]
+ vpbroadcastb 128(%rdx), %xmm30
+
+// CHECK: vpbroadcastb -128(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0x72,0x80]
+ vpbroadcastb -128(%rdx), %xmm30
+
+// CHECK: vpbroadcastb -129(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0xb2,0x7f,0xff,0xff,0xff]
+ vpbroadcastb -129(%rdx), %xmm30
+
+// CHECK: vpbroadcastb %xmm25, %ymm17
+// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x78,0xc9]
+ vpbroadcastb %xmm25, %ymm17
+
+// CHECK: vpbroadcastb %xmm25, %ymm17 {%k2}
+// CHECK: encoding: [0x62,0x82,0x7d,0x2a,0x78,0xc9]
+ vpbroadcastb %xmm25, %ymm17 {%k2}
+
+// CHECK: vpbroadcastb %xmm25, %ymm17 {%k2} {z}
+// CHECK: encoding: [0x62,0x82,0x7d,0xaa,0x78,0xc9]
+ vpbroadcastb %xmm25, %ymm17 {%k2} {z}
+
+// CHECK: vpbroadcastb (%rcx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x09]
+ vpbroadcastb (%rcx), %ymm17
+
+// CHECK: vpbroadcastb 4660(%rax,%r14,8), %ymm17
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x78,0x8c,0xf0,0x34,0x12,0x00,0x00]
+ vpbroadcastb 4660(%rax,%r14,8), %ymm17
+
+// CHECK: vpbroadcastb 127(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x4a,0x7f]
+ vpbroadcastb 127(%rdx), %ymm17
+
+// CHECK: vpbroadcastb 128(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x8a,0x80,0x00,0x00,0x00]
+ vpbroadcastb 128(%rdx), %ymm17
+
+// CHECK: vpbroadcastb -128(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x4a,0x80]
+ vpbroadcastb -128(%rdx), %ymm17
+
+// CHECK: vpbroadcastb -129(%rdx), %ymm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x8a,0x7f,0xff,0xff,0xff]
+ vpbroadcastb -129(%rdx), %ymm17
+
+// CHECK: vpbroadcastb %eax, %xmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x7a,0xe0]
+ vpbroadcastb %eax, %xmm20
+
+// CHECK: vpbroadcastb %eax, %xmm20 {%k3}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0b,0x7a,0xe0]
+ vpbroadcastb %eax, %xmm20 {%k3}
+
+// CHECK: vpbroadcastb %eax, %xmm20 {%k3} {z}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x8b,0x7a,0xe0]
+ vpbroadcastb %eax, %xmm20 {%k3} {z}
+
+// CHECK: vpbroadcastb %eax, %ymm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x7a,0xd8]
+ vpbroadcastb %eax, %ymm27
+
+// CHECK: vpbroadcastb %eax, %ymm27 {%k6}
+// CHECK: encoding: [0x62,0x62,0x7d,0x2e,0x7a,0xd8]
+ vpbroadcastb %eax, %ymm27 {%k6}
+
+// CHECK: vpbroadcastb %eax, %ymm27 {%k6} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0xae,0x7a,0xd8]
+ vpbroadcastb %eax, %ymm27 {%k6} {z}
+
+// CHECK: vpbroadcastw %xmm24, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x79,0xf0]
+ vpbroadcastw %xmm24, %xmm30
+
+// CHECK: vpbroadcastw %xmm24, %xmm30 {%k1}
+// CHECK: encoding: [0x62,0x02,0x7d,0x09,0x79,0xf0]
+ vpbroadcastw %xmm24, %xmm30 {%k1}
+
+// CHECK: vpbroadcastw %xmm24, %xmm30 {%k1} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0x89,0x79,0xf0]
+ vpbroadcastw %xmm24, %xmm30 {%k1} {z}
+
+// CHECK: vpbroadcastw (%rcx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0x31]
+ vpbroadcastw (%rcx), %xmm30
+
+// CHECK: vpbroadcastw 4660(%rax,%r14,8), %xmm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x79,0xb4,0xf0,0x34,0x12,0x00,0x00]
+ vpbroadcastw 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vpbroadcastw 254(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0x72,0x7f]
+ vpbroadcastw 254(%rdx), %xmm30
+
+// CHECK: vpbroadcastw 256(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0xb2,0x00,0x01,0x00,0x00]
+ vpbroadcastw 256(%rdx), %xmm30
+
+// CHECK: vpbroadcastw -256(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0x72,0x80]
+ vpbroadcastw -256(%rdx), %xmm30
+
+// CHECK: vpbroadcastw -258(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0xb2,0xfe,0xfe,0xff,0xff]
+ vpbroadcastw -258(%rdx), %xmm30
+
+// CHECK: vpbroadcastw %xmm18, %ymm28
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x79,0xe2]
+ vpbroadcastw %xmm18, %ymm28
+
+// CHECK: vpbroadcastw %xmm18, %ymm28 {%k3}
+// CHECK: encoding: [0x62,0x22,0x7d,0x2b,0x79,0xe2]
+ vpbroadcastw %xmm18, %ymm28 {%k3}
+
+// CHECK: vpbroadcastw %xmm18, %ymm28 {%k3} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xab,0x79,0xe2]
+ vpbroadcastw %xmm18, %ymm28 {%k3} {z}
+
+// CHECK: vpbroadcastw (%rcx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x21]
+ vpbroadcastw (%rcx), %ymm28
+
+// CHECK: vpbroadcastw 4660(%rax,%r14,8), %ymm28
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x79,0xa4,0xf0,0x34,0x12,0x00,0x00]
+ vpbroadcastw 4660(%rax,%r14,8), %ymm28
+
+// CHECK: vpbroadcastw 254(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x62,0x7f]
+ vpbroadcastw 254(%rdx), %ymm28
+
+// CHECK: vpbroadcastw 256(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0xa2,0x00,0x01,0x00,0x00]
+ vpbroadcastw 256(%rdx), %ymm28
+
+// CHECK: vpbroadcastw -256(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x62,0x80]
+ vpbroadcastw -256(%rdx), %ymm28
+
+// CHECK: vpbroadcastw -258(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0xa2,0xfe,0xfe,0xff,0xff]
+ vpbroadcastw -258(%rdx), %ymm28
+
+// CHECK: vpbroadcastw %eax, %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x7b,0xc0]
+ vpbroadcastw %eax, %xmm24
+
+// CHECK: vpbroadcastw %eax, %xmm24 {%k6}
+// CHECK: encoding: [0x62,0x62,0x7d,0x0e,0x7b,0xc0]
+ vpbroadcastw %eax, %xmm24 {%k6}
+
+// CHECK: vpbroadcastw %eax, %xmm24 {%k6} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0x8e,0x7b,0xc0]
+ vpbroadcastw %eax, %xmm24 {%k6} {z}
+
+// CHECK: vpbroadcastw %eax, %ymm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x7b,0xd8]
+ vpbroadcastw %eax, %ymm19
+
+// CHECK: vpbroadcastw %eax, %ymm19 {%k3}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x2b,0x7b,0xd8]
+ vpbroadcastw %eax, %ymm19 {%k3}
+
+// CHECK: vpbroadcastw %eax, %ymm19 {%k3} {z}
+// CHECK: encoding: [0x62,0xe2,0x7d,0xab,0x7b,0xd8]
+ vpbroadcastw %eax, %ymm19 {%k3} {z}
+
+// CHECK: vpbroadcastb %xmm20, %xmm20
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x78,0xe4]
+ vpbroadcastb %xmm20, %xmm20
+
+// CHECK: vpbroadcastb %xmm20, %xmm20 {%k7}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0f,0x78,0xe4]
+ vpbroadcastb %xmm20, %xmm20 {%k7}
+
+// CHECK: vpbroadcastb %xmm20, %xmm20 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x8f,0x78,0xe4]
+ vpbroadcastb %xmm20, %xmm20 {%k7} {z}
+
+// CHECK: vpbroadcastb (%rcx), %xmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0x21]
+ vpbroadcastb (%rcx), %xmm20
+
+// CHECK: vpbroadcastb 291(%rax,%r14,8), %xmm20
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x78,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastb 291(%rax,%r14,8), %xmm20
+
+// CHECK: vpbroadcastb 127(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0x62,0x7f]
+ vpbroadcastb 127(%rdx), %xmm20
+
+// CHECK: vpbroadcastb 128(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0xa2,0x80,0x00,0x00,0x00]
+ vpbroadcastb 128(%rdx), %xmm20
+
+// CHECK: vpbroadcastb -128(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0x62,0x80]
+ vpbroadcastb -128(%rdx), %xmm20
+
+// CHECK: vpbroadcastb -129(%rdx), %xmm20
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0xa2,0x7f,0xff,0xff,0xff]
+ vpbroadcastb -129(%rdx), %xmm20
+
+// CHECK: vpbroadcastb %xmm27, %ymm30
+// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x78,0xf3]
+ vpbroadcastb %xmm27, %ymm30
+
+// CHECK: vpbroadcastb %xmm27, %ymm30 {%k6}
+// CHECK: encoding: [0x62,0x02,0x7d,0x2e,0x78,0xf3]
+ vpbroadcastb %xmm27, %ymm30 {%k6}
+
+// CHECK: vpbroadcastb %xmm27, %ymm30 {%k6} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0xae,0x78,0xf3]
+ vpbroadcastb %xmm27, %ymm30 {%k6} {z}
+
+// CHECK: vpbroadcastb (%rcx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0x31]
+ vpbroadcastb (%rcx), %ymm30
+
+// CHECK: vpbroadcastb 291(%rax,%r14,8), %ymm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x78,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastb 291(%rax,%r14,8), %ymm30
+
+// CHECK: vpbroadcastb 127(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0x72,0x7f]
+ vpbroadcastb 127(%rdx), %ymm30
+
+// CHECK: vpbroadcastb 128(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0xb2,0x80,0x00,0x00,0x00]
+ vpbroadcastb 128(%rdx), %ymm30
+
+// CHECK: vpbroadcastb -128(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0x72,0x80]
+ vpbroadcastb -128(%rdx), %ymm30
+
+// CHECK: vpbroadcastb -129(%rdx), %ymm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0xb2,0x7f,0xff,0xff,0xff]
+ vpbroadcastb -129(%rdx), %ymm30
+
+// CHECK: vpbroadcastb %eax, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x7a,0xf0]
+ vpbroadcastb %eax, %xmm22
+
+// CHECK: vpbroadcastb %eax, %xmm22 {%k3}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0b,0x7a,0xf0]
+ vpbroadcastb %eax, %xmm22 {%k3}
+
+// CHECK: vpbroadcastb %eax, %xmm22 {%k3} {z}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x8b,0x7a,0xf0]
+ vpbroadcastb %eax, %xmm22 {%k3} {z}
+
+// CHECK: vpbroadcastb %eax, %ymm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x7a,0xc8]
+ vpbroadcastb %eax, %ymm17
+
+// CHECK: vpbroadcastb %eax, %ymm17 {%k1}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x29,0x7a,0xc8]
+ vpbroadcastb %eax, %ymm17 {%k1}
+
+// CHECK: vpbroadcastb %eax, %ymm17 {%k1} {z}
+// CHECK: encoding: [0x62,0xe2,0x7d,0xa9,0x7a,0xc8]
+ vpbroadcastb %eax, %ymm17 {%k1} {z}
+
+// CHECK: vpbroadcastw %xmm20, %xmm19
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x79,0xdc]
+ vpbroadcastw %xmm20, %xmm19
+
+// CHECK: vpbroadcastw %xmm20, %xmm19 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x79,0xdc]
+ vpbroadcastw %xmm20, %xmm19 {%k2}
+
+// CHECK: vpbroadcastw %xmm20, %xmm19 {%k2} {z}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x8a,0x79,0xdc]
+ vpbroadcastw %xmm20, %xmm19 {%k2} {z}
+
+// CHECK: vpbroadcastw (%rcx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x19]
+ vpbroadcastw (%rcx), %xmm19
+
+// CHECK: vpbroadcastw 291(%rax,%r14,8), %xmm19
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x79,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastw 291(%rax,%r14,8), %xmm19
+
+// CHECK: vpbroadcastw 254(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x5a,0x7f]
+ vpbroadcastw 254(%rdx), %xmm19
+
+// CHECK: vpbroadcastw 256(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x9a,0x00,0x01,0x00,0x00]
+ vpbroadcastw 256(%rdx), %xmm19
+
+// CHECK: vpbroadcastw -256(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x5a,0x80]
+ vpbroadcastw -256(%rdx), %xmm19
+
+// CHECK: vpbroadcastw -258(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x9a,0xfe,0xfe,0xff,0xff]
+ vpbroadcastw -258(%rdx), %xmm19
+
+// CHECK: vpbroadcastw %xmm17, %ymm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x79,0xc9]
+ vpbroadcastw %xmm17, %ymm25
+
+// CHECK: vpbroadcastw %xmm17, %ymm25 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x79,0xc9]
+ vpbroadcastw %xmm17, %ymm25 {%k7}
+
+// CHECK: vpbroadcastw %xmm17, %ymm25 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x79,0xc9]
+ vpbroadcastw %xmm17, %ymm25 {%k7} {z}
+
+// CHECK: vpbroadcastw (%rcx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x09]
+ vpbroadcastw (%rcx), %ymm25
+
+// CHECK: vpbroadcastw 291(%rax,%r14,8), %ymm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x79,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastw 291(%rax,%r14,8), %ymm25
+
+// CHECK: vpbroadcastw 254(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x4a,0x7f]
+ vpbroadcastw 254(%rdx), %ymm25
+
+// CHECK: vpbroadcastw 256(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x8a,0x00,0x01,0x00,0x00]
+ vpbroadcastw 256(%rdx), %ymm25
+
+// CHECK: vpbroadcastw -256(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x4a,0x80]
+ vpbroadcastw -256(%rdx), %ymm25
+
+// CHECK: vpbroadcastw -258(%rdx), %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x8a,0xfe,0xfe,0xff,0xff]
+ vpbroadcastw -258(%rdx), %ymm25
+
+// CHECK: vpbroadcastw %eax, %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x7b,0xe8]
+ vpbroadcastw %eax, %xmm29
+
+// CHECK: vpbroadcastw %eax, %xmm29 {%k1}
+// CHECK: encoding: [0x62,0x62,0x7d,0x09,0x7b,0xe8]
+ vpbroadcastw %eax, %xmm29 {%k1}
+
+// CHECK: vpbroadcastw %eax, %xmm29 {%k1} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0x89,0x7b,0xe8]
+ vpbroadcastw %eax, %xmm29 {%k1} {z}
+
+// CHECK: vpbroadcastw %eax, %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x7b,0xe0]
+ vpbroadcastw %eax, %ymm28
+
+// CHECK: vpbroadcastw %eax, %ymm28 {%k4}
+// CHECK: encoding: [0x62,0x62,0x7d,0x2c,0x7b,0xe0]
+ vpbroadcastw %eax, %ymm28 {%k4}
+
+// CHECK: vpbroadcastw %eax, %ymm28 {%k4} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0xac,0x7b,0xe0]
+ vpbroadcastw %eax, %ymm28 {%k4} {z}
+
diff --git a/llvm/test/MC/X86/x86-64-avx512f_vl.s b/llvm/test/MC/X86/x86-64-avx512f_vl.s
index b6bc85bcb5d..972b8a4007a 100644
--- a/llvm/test/MC/X86/x86-64-avx512f_vl.s
+++ b/llvm/test/MC/X86/x86-64-avx512f_vl.s
@@ -21563,3 +21563,251 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x92,0xf8,0xfb,0xff,0xff,0x7b]
vpternlogq $0x7b, -1032(%rdx){1to4}, %ymm23, %ymm26
+// CHECK: vpbroadcastd (%rcx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0x29]
+ vpbroadcastd (%rcx), %xmm29
+
+// CHECK: vpbroadcastd (%rcx), %xmm29 {%k1}
+// CHECK: encoding: [0x62,0x62,0x7d,0x09,0x58,0x29]
+ vpbroadcastd (%rcx), %xmm29 {%k1}
+
+// CHECK: vpbroadcastd (%rcx), %xmm29 {%k1} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0x89,0x58,0x29]
+ vpbroadcastd (%rcx), %xmm29 {%k1} {z}
+
+// CHECK: vpbroadcastd 291(%rax,%r14,8), %xmm29
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x58,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastd 291(%rax,%r14,8), %xmm29
+
+// CHECK: vpbroadcastd 508(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0x6a,0x7f]
+ vpbroadcastd 508(%rdx), %xmm29
+
+// CHECK: vpbroadcastd 512(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0xaa,0x00,0x02,0x00,0x00]
+ vpbroadcastd 512(%rdx), %xmm29
+
+// CHECK: vpbroadcastd -512(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0x6a,0x80]
+ vpbroadcastd -512(%rdx), %xmm29
+
+// CHECK: vpbroadcastd -516(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0xaa,0xfc,0xfd,0xff,0xff]
+ vpbroadcastd -516(%rdx), %xmm29
+
+// CHECK: vpbroadcastd (%rcx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0x21]
+ vpbroadcastd (%rcx), %ymm28
+
+// CHECK: vpbroadcastd (%rcx), %ymm28 {%k2}
+// CHECK: encoding: [0x62,0x62,0x7d,0x2a,0x58,0x21]
+ vpbroadcastd (%rcx), %ymm28 {%k2}
+
+// CHECK: vpbroadcastd (%rcx), %ymm28 {%k2} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0xaa,0x58,0x21]
+ vpbroadcastd (%rcx), %ymm28 {%k2} {z}
+
+// CHECK: vpbroadcastd 291(%rax,%r14,8), %ymm28
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x58,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastd 291(%rax,%r14,8), %ymm28
+
+// CHECK: vpbroadcastd 508(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0x62,0x7f]
+ vpbroadcastd 508(%rdx), %ymm28
+
+// CHECK: vpbroadcastd 512(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0xa2,0x00,0x02,0x00,0x00]
+ vpbroadcastd 512(%rdx), %ymm28
+
+// CHECK: vpbroadcastd -512(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0x62,0x80]
+ vpbroadcastd -512(%rdx), %ymm28
+
+// CHECK: vpbroadcastd -516(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0xa2,0xfc,0xfd,0xff,0xff]
+ vpbroadcastd -516(%rdx), %ymm28
+
+// CHECK: vpbroadcastd %xmm18, %xmm29
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x58,0xea]
+ vpbroadcastd %xmm18, %xmm29
+
+// CHECK: vpbroadcastd %xmm18, %xmm29 {%k2}
+// CHECK: encoding: [0x62,0x22,0x7d,0x0a,0x58,0xea]
+ vpbroadcastd %xmm18, %xmm29 {%k2}
+
+// CHECK: vpbroadcastd %xmm18, %xmm29 {%k2} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0x8a,0x58,0xea]
+ vpbroadcastd %xmm18, %xmm29 {%k2} {z}
+
+// CHECK: vpbroadcastd %xmm26, %ymm17
+// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x58,0xca]
+ vpbroadcastd %xmm26, %ymm17
+
+// CHECK: vpbroadcastd %xmm26, %ymm17 {%k3}
+// CHECK: encoding: [0x62,0x82,0x7d,0x2b,0x58,0xca]
+ vpbroadcastd %xmm26, %ymm17 {%k3}
+
+// CHECK: vpbroadcastd %xmm26, %ymm17 {%k3} {z}
+// CHECK: encoding: [0x62,0x82,0x7d,0xab,0x58,0xca]
+ vpbroadcastd %xmm26, %ymm17 {%k3} {z}
+
+// CHECK: vpbroadcastd %eax, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x7c,0xf0]
+ vpbroadcastd %eax, %xmm22
+
+// CHECK: vpbroadcastd %eax, %xmm22 {%k5}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0d,0x7c,0xf0]
+ vpbroadcastd %eax, %xmm22 {%k5}
+
+// CHECK: vpbroadcastd %eax, %xmm22 {%k5} {z}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x8d,0x7c,0xf0]
+ vpbroadcastd %eax, %xmm22 {%k5} {z}
+
+// CHECK: vpbroadcastd %ebp, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x7c,0xf5]
+ vpbroadcastd %ebp, %xmm22
+
+// CHECK: vpbroadcastd %r13d, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x7d,0x08,0x7c,0xf5]
+ vpbroadcastd %r13d, %xmm22
+
+// CHECK: vpbroadcastd %eax, %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x7c,0xc8]
+ vpbroadcastd %eax, %ymm25
+
+// CHECK: vpbroadcastd %eax, %ymm25 {%k5}
+// CHECK: encoding: [0x62,0x62,0x7d,0x2d,0x7c,0xc8]
+ vpbroadcastd %eax, %ymm25 {%k5}
+
+// CHECK: vpbroadcastd %eax, %ymm25 {%k5} {z}
+// CHECK: encoding: [0x62,0x62,0x7d,0xad,0x7c,0xc8]
+ vpbroadcastd %eax, %ymm25 {%k5} {z}
+
+// CHECK: vpbroadcastd %ebp, %ymm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x7c,0xcd]
+ vpbroadcastd %ebp, %ymm25
+
+// CHECK: vpbroadcastd %r13d, %ymm25
+// CHECK: encoding: [0x62,0x42,0x7d,0x28,0x7c,0xcd]
+ vpbroadcastd %r13d, %ymm25
+
+// CHECK: vpbroadcastq (%rcx), %xmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0x31]
+ vpbroadcastq (%rcx), %xmm30
+
+// CHECK: vpbroadcastq (%rcx), %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x62,0xfd,0x0f,0x59,0x31]
+ vpbroadcastq (%rcx), %xmm30 {%k7}
+
+// CHECK: vpbroadcastq (%rcx), %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x62,0xfd,0x8f,0x59,0x31]
+ vpbroadcastq (%rcx), %xmm30 {%k7} {z}
+
+// CHECK: vpbroadcastq 291(%rax,%r14,8), %xmm30
+// CHECK: encoding: [0x62,0x22,0xfd,0x08,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastq 291(%rax,%r14,8), %xmm30
+
+// CHECK: vpbroadcastq 1016(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0x72,0x7f]
+ vpbroadcastq 1016(%rdx), %xmm30
+
+// CHECK: vpbroadcastq 1024(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0xb2,0x00,0x04,0x00,0x00]
+ vpbroadcastq 1024(%rdx), %xmm30
+
+// CHECK: vpbroadcastq -1024(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0x72,0x80]
+ vpbroadcastq -1024(%rdx), %xmm30
+
+// CHECK: vpbroadcastq -1032(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0xb2,0xf8,0xfb,0xff,0xff]
+ vpbroadcastq -1032(%rdx), %xmm30
+
+// CHECK: vpbroadcastq (%rcx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x19]
+ vpbroadcastq (%rcx), %ymm19
+
+// CHECK: vpbroadcastq (%rcx), %ymm19 {%k7}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2f,0x59,0x19]
+ vpbroadcastq (%rcx), %ymm19 {%k7}
+
+// CHECK: vpbroadcastq (%rcx), %ymm19 {%k7} {z}
+// CHECK: encoding: [0x62,0xe2,0xfd,0xaf,0x59,0x19]
+ vpbroadcastq (%rcx), %ymm19 {%k7} {z}
+
+// CHECK: vpbroadcastq 291(%rax,%r14,8), %ymm19
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x59,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpbroadcastq 291(%rax,%r14,8), %ymm19
+
+// CHECK: vpbroadcastq 1016(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x5a,0x7f]
+ vpbroadcastq 1016(%rdx), %ymm19
+
+// CHECK: vpbroadcastq 1024(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x9a,0x00,0x04,0x00,0x00]
+ vpbroadcastq 1024(%rdx), %ymm19
+
+// CHECK: vpbroadcastq -1024(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x5a,0x80]
+ vpbroadcastq -1024(%rdx), %ymm19
+
+// CHECK: vpbroadcastq -1032(%rdx), %ymm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x9a,0xf8,0xfb,0xff,0xff]
+ vpbroadcastq -1032(%rdx), %ymm19
+
+// CHECK: vpbroadcastq %xmm24, %xmm19
+// CHECK: encoding: [0x62,0x82,0xfd,0x08,0x59,0xd8]
+ vpbroadcastq %xmm24, %xmm19
+
+// CHECK: vpbroadcastq %xmm24, %xmm19 {%k6}
+// CHECK: encoding: [0x62,0x82,0xfd,0x0e,0x59,0xd8]
+ vpbroadcastq %xmm24, %xmm19 {%k6}
+
+// CHECK: vpbroadcastq %xmm24, %xmm19 {%k6} {z}
+// CHECK: encoding: [0x62,0x82,0xfd,0x8e,0x59,0xd8]
+ vpbroadcastq %xmm24, %xmm19 {%k6} {z}
+
+// CHECK: vpbroadcastq %xmm26, %ymm19
+// CHECK: encoding: [0x62,0x82,0xfd,0x28,0x59,0xda]
+ vpbroadcastq %xmm26, %ymm19
+
+// CHECK: vpbroadcastq %xmm26, %ymm19 {%k6}
+// CHECK: encoding: [0x62,0x82,0xfd,0x2e,0x59,0xda]
+ vpbroadcastq %xmm26, %ymm19 {%k6}
+
+// CHECK: vpbroadcastq %xmm26, %ymm19 {%k6} {z}
+// CHECK: encoding: [0x62,0x82,0xfd,0xae,0x59,0xda]
+ vpbroadcastq %xmm26, %ymm19 {%k6} {z}
+
+// CHECK: vpbroadcastq %rax, %xmm22
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x7c,0xf0]
+ vpbroadcastq %rax, %xmm22
+
+// CHECK: vpbroadcastq %rax, %xmm22 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x7c,0xf0]
+ vpbroadcastq %rax, %xmm22 {%k2}
+
+// CHECK: vpbroadcastq %rax, %xmm22 {%k2} {z}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x8a,0x7c,0xf0]
+ vpbroadcastq %rax, %xmm22 {%k2} {z}
+
+// CHECK: vpbroadcastq %r8, %xmm22
+// CHECK: encoding: [0x62,0xc2,0xfd,0x08,0x7c,0xf0]
+ vpbroadcastq %r8, %xmm22
+
+// CHECK: vpbroadcastq %rax, %ymm19
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x7c,0xd8]
+ vpbroadcastq %rax, %ymm19
+
+// CHECK: vpbroadcastq %rax, %ymm19 {%k5}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2d,0x7c,0xd8]
+ vpbroadcastq %rax, %ymm19 {%k5}
+
+// CHECK: vpbroadcastq %rax, %ymm19 {%k5} {z}
+// CHECK: encoding: [0x62,0xe2,0xfd,0xad,0x7c,0xd8]
+ vpbroadcastq %rax, %ymm19 {%k5} {z}
+
+// CHECK: vpbroadcastq %r8, %ymm19
+// CHECK: encoding: [0x62,0xc2,0xfd,0x28,0x7c,0xd8]
+ vpbroadcastq %r8, %ymm19
+
OpenPOWER on IntegriCloud