summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Support/Host.cpp1
-rw-r--r--llvm/lib/Target/X86/X86.td3
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp4
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h5
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td117
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td13
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td1
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h107
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp1
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h4
10 files changed, 246 insertions, 10 deletions
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index 9cc21a284a8..f80ec6ba72b 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -1258,6 +1258,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1);
Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
+ Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save;
Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave;
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index a5ffc72ce0e..02186937cb4 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -152,6 +152,9 @@ def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true",
def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
"Enable AVX-512 Vector Byte Manipulation Instructions",
[FeatureBWI]>;
+def FeatureVBMI2 : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
+ "Enable AVX-512 further Vector Byte Manipulation Instructions",
+ [FeatureBWI]>;
def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
"Enable AVX-512 Integer Fused Multiple-Add",
[FeatureAVX512]>;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index eeae6724b50..ac4b1d672bb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25090,6 +25090,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PACKUS: return "X86ISD::PACKUS";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::VALIGN: return "X86ISD::VALIGN";
+ case X86ISD::VSHLD: return "X86ISD::VSHLD";
+ case X86ISD::VSHRD: return "X86ISD::VSHRD";
+ case X86ISD::VSHLDV: return "X86ISD::VSHLDV";
+ case X86ISD::VSHRDV: return "X86ISD::VSHRDV";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 426fb347af6..1327cf2c445 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -391,6 +391,11 @@ namespace llvm {
PSHUFHW,
PSHUFLW,
SHUFP,
+ // VBMI2 Concat & Shift.
+ VSHLD,
+ VSHRD,
+ VSHLDV,
+ VSHRDV,
//Shuffle Packed Values at 128-bit granularity.
SHUF128,
MOVDDUP,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 984d1433616..9c6e923b3d3 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -8491,11 +8491,13 @@ multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {
}
multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo VTInfo> {
+ AVX512VLVectorVTInfo VTInfo,
+ Predicate Pred = HasAVX512> {
+ let Predicates = [Pred] in
defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr>,
compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
- let Predicates = [HasVLX] in {
+ let Predicates = [Pred, HasVLX] in {
defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr>,
compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr>,
@@ -8539,11 +8541,13 @@ multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > {
}
multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo VTInfo> {
+ AVX512VLVectorVTInfo VTInfo,
+ Predicate Pred = HasAVX512> {
+ let Predicates = [Pred] in
defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr>,
expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
- let Predicates = [HasVLX] in {
+ let Predicates = [Pred, HasVLX] in {
defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr>,
expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr>,
@@ -8748,12 +8752,13 @@ multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
}
multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
- AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{
- let Predicates = [HasBWI] in {
+ AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo,
+ Predicate Pred = HasBWI> {
+ let Predicates = [Pred] in {
defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info512,
SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
}
- let Predicates = [HasBWI, HasVLX] in {
+ let Predicates = [Pred, HasVLX] in {
defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info128,
SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info256,
@@ -8762,11 +8767,12 @@ multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
}
multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
- bits<8> opc, SDNode OpNode>{
- let Predicates = [HasAVX512] in {
+ bits<8> opc, SDNode OpNode,
+ Predicate Pred = HasAVX512> {
+ let Predicates = [Pred] in {
defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
}
- let Predicates = [HasAVX512, HasVLX] in {
+ let Predicates = [Pred, HasVLX] in {
defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
}
@@ -10063,3 +10069,94 @@ defm : vpclmulqdq_aliases<"VPCLMULQDQZ", VR512, i512mem>;
defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
+//===----------------------------------------------------------------------===//
+// VBMI2
+//===----------------------------------------------------------------------===//
+
+multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
+ X86VectorVTInfo VTI> {
+ let Constraints = "$src1 = $dst",
+ ExeDomain = VTI.ExeDomain in {
+ defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
+ (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
+ "$src3, $src2", "$src2, $src3",
+ (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
+ AVX512FMA3Base;
+ defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
+ (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
+ "$src3, $src2", "$src2, $src3",
+ (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
+ (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>,
+ AVX512FMA3Base;
+ }
+}
+
+multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
+ X86VectorVTInfo VTI>
+ : VBMI2_shift_var_rm<Op, OpStr, OpNode, VTI> {
+ let Constraints = "$src1 = $dst",
+ ExeDomain = VTI.ExeDomain in
+ defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
+ (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
+ "${src3}"##VTI.BroadcastStr##", $src2",
+ "$src2, ${src3}"##VTI.BroadcastStr,
+ (OpNode VTI.RC:$src1, VTI.RC:$src2,
+ (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
+ AVX512FMA3Base, EVEX_B;
+}
+
+multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTI> {
+ let Predicates = [HasVBMI2] in
+ defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, VTI.info512>, EVEX_V512;
+ let Predicates = [HasVBMI2, HasVLX] in {
+ defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, VTI.info256>, EVEX_V256;
+ defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, VTI.info128>, EVEX_V128;
+ }
+}
+
+multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTI> {
+ let Predicates = [HasVBMI2] in
+ defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, VTI.info512>, EVEX_V512;
+ let Predicates = [HasVBMI2, HasVLX] in {
+ defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, VTI.info256>, EVEX_V256;
+ defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, VTI.info128>, EVEX_V128;
+ }
+}
+multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
+ SDNode OpNode> {
+ defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode,
+ avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
+ defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode,
+ avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+ defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode,
+ avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
+ SDNode OpNode> {
+ defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", avx512vl_i16_info,
+ avx512vl_i16_info, HasVBMI2>, VEX_W, EVEX_CD8<16, CD8VF>;
+ defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
+ OpNode, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+ defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
+ HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+}
+
+// Concat & Shift
+defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv>;
+defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv>;
+defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld>;
+defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd>;
+// Compress
+defm VPCOMPRESSB : compress_by_elt_width <0x63, "vpcompressb", avx512vl_i8_info,
+ HasVBMI2>, EVEX;
+defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", avx512vl_i16_info,
+ HasVBMI2>, EVEX, VEX_W;
+// Expand
+defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", avx512vl_i8_info,
+ HasVBMI2>, EVEX;
+defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", avx512vl_i16_info,
+ HasVBMI2>, EVEX, VEX_W;
+
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 6f25b30dba9..c98aa3b9123 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -350,6 +350,19 @@ def X86PAlignr : SDNode<"X86ISD::PALIGNR",
SDTCisVT<3, i8>]>>;
def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
+def X86VShld : SDNode<"X86ISD::VSHLD", SDTShuff3OpI>;
+def X86VShrd : SDNode<"X86ISD::VSHRD", SDTShuff3OpI>;
+def X86VShldv : SDNode<"X86ISD::VSHLDV",
+ SDTypeProfile<1, 3, [SDTCisVec<0>,
+ SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>,
+ SDTCisSameAs<0,3>]>>;
+def X86VShrdv : SDNode<"X86ISD::VSHRDV",
+ SDTypeProfile<1, 3, [SDTCisVec<0>,
+ SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>,
+ SDTCisSameAs<0,3>]>>;
+
def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 91e55f84217..97f11a32f46 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -862,6 +862,7 @@ def HasBMI2 : Predicate<"Subtarget->hasBMI2()">;
def NoBMI2 : Predicate<"!Subtarget->hasBMI2()">;
def HasVBMI : Predicate<"Subtarget->hasVBMI()">,
AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">;
+def HasVBMI2 : Predicate<"Subtarget->hasVBMI2()">;
def HasIFMA : Predicate<"Subtarget->hasIFMA()">,
AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">;
def HasRTM : Predicate<"Subtarget->hasRTM()">;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index e3554333828..11f338b878a 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -120,6 +120,12 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_b_128,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_b_256,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_b_512,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_d_128,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_d_256,
@@ -144,6 +150,18 @@ static const IntrinsicData IntrinsicsWithChain[] = {
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_store_q_512,
COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_w_128,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_w_256,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_store_w_512,
+ COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_b_128,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_b_256,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_b_512,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_d_128,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_d_256,
@@ -168,6 +186,12 @@ static const IntrinsicData IntrinsicsWithChain[] = {
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_w_128,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_w_256,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_load_w_512,
+ EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
@@ -479,6 +503,13 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC,
X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
+
+ X86_INTRINSIC_DATA(avx512_mask_compress_b_128, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_b_256, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_b_512, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_d_128, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_d_256, COMPRESS_EXPAND_IN_REG,
@@ -503,6 +534,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_EXPAND_IN_REG,
X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_w_128, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_w_256, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_compress_w_512, COMPRESS_EXPAND_IN_REG,
+ X86ISD::COMPRESS, 0),
X86_INTRINSIC_DATA(avx512_mask_conflict_d_128, INTR_TYPE_1OP_MASK,
X86ISD::CONFLICT, 0),
X86_INTRINSIC_DATA(avx512_mask_conflict_d_256, INTR_TYPE_1OP_MASK,
@@ -677,6 +714,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FDIVS_RND, 0),
X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FDIVS_RND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_b_128, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_b_256, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_b_512, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_d_128, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_d_256, COMPRESS_EXPAND_IN_REG,
@@ -701,6 +744,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_q_512, COMPRESS_EXPAND_IN_REG,
X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_w_128, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_w_256, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_expand_w_512, COMPRESS_EXPAND_IN_REG,
+ X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_128, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_256, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_512, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
@@ -1192,6 +1241,44 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_mask_vpmadd52l_uq_512 , IFMA_OP_MASK,
X86ISD::VPMADD52L, 0),
+
+ X86_INTRINSIC_DATA(avx512_mask_vpshld_d_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshld_d_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshld_d_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshld_q_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshld_q_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshld_q_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshld_w_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshld_w_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshld_w_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrd_d_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrd_d_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrd_d_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrd_q_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrd_q_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrd_q_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrd_w_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrd_w_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrd_w_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, ISD::FMA, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, ISD::FMA, 0),
X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, ISD::FMA,
@@ -1338,6 +1425,26 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_512, IFMA_OP_MASKZ,
X86ISD::VPMADD52L, 0),
+
+ X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+
X86_INTRINSIC_DATA(avx512_packssdw_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index ff29b1e569c..eb73b123a9f 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -313,6 +313,7 @@ void X86Subtarget::initializeEnvironment() {
HasBMI = false;
HasBMI2 = false;
HasVBMI = false;
+ HasVBMI2 = false;
HasIFMA = false;
HasRTM = false;
HasERI = false;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 482605384af..e1711ece0c6 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -167,6 +167,9 @@ protected:
/// Processor has VBMI instructions.
bool HasVBMI;
+ /// Processor has VBMI2 instructions.
+ bool HasVBMI2;
+
/// Processor has Integer Fused Multiply Add
bool HasIFMA;
@@ -483,6 +486,7 @@ public:
bool hasBMI() const { return HasBMI; }
bool hasBMI2() const { return HasBMI2; }
bool hasVBMI() const { return HasVBMI; }
+ bool hasVBMI2() const { return HasVBMI2; }
bool hasIFMA() const { return HasIFMA; }
bool hasRTM() const { return HasRTM; }
bool hasADX() const { return HasADX; }
OpenPOWER on IntegriCloud