summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Support/Host.cpp3
-rw-r--r--llvm/lib/Target/X86/X86.td3
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp4
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h6
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td44
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td9
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td1
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h26
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp1
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h4
10 files changed, 101 insertions, 0 deletions
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index f80ec6ba72b..c5436f7e228 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -1266,6 +1266,9 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
// VPCLMULQDQ (carry-less multiplication quadword)
Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
+ // Enable Vector Neural Network Instructions
+ Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
+
bool HasLeafD = MaxLevel >= 0xd &&
!getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 02186937cb4..e0745ec8001 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -160,6 +160,9 @@ def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
[FeatureAVX512]>;
def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true",
"Enable protection keys">;
+def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
+ "Enable AVX-512 Vector Neural Network Instructions",
+ [FeatureAVX512]>;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ac4b1d672bb..6aebfb7d52c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25250,6 +25250,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND";
case X86ISD::LWPINS: return "X86ISD::LWPINS";
case X86ISD::MGATHER: return "X86ISD::MGATHER";
+ case X86ISD::VPDPBUSD: return "X86ISD::VPDPBUSD";
+ case X86ISD::VPDPBUSDS: return "X86ISD::VPDPBUSDS";
+ case X86ISD::VPDPWSSD: return "X86ISD::VPDPWSSD";
+ case X86ISD::VPDPWSSDS: return "X86ISD::VPDPWSSDS";
}
return nullptr;
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 1327cf2c445..b79addfe198 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -481,6 +481,12 @@ namespace llvm {
// op0 x op1 + op2.
VPMADD52L, VPMADD52H,
+ // VNNI
+ VPDPBUSD,
+ VPDPBUSDS,
+ VPDPWSSD,
+ VPDPWSSDS,
+
// FMA nodes.
// We use the target independent ISD::FMA for the non-inverted case.
FNMADD,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 9c6e923b3d3..fa044254cf9 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -10160,3 +10160,47 @@ defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", avx512vl_i8_info,
defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", avx512vl_i16_info,
HasVBMI2>, EVEX, VEX_W;
+//===----------------------------------------------------------------------===//
+// VNNI
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in
+multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
+ X86VectorVTInfo VTI> {
+ defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
+ (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
+ "$src3, $src2", "$src2, $src3",
+ (VTI.VT (OpNode VTI.RC:$src1,
+ VTI.RC:$src2, VTI.RC:$src3))>,
+ EVEX_4V, T8PD;
+ defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
+ (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
+ "$src3, $src2", "$src2, $src3",
+ (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
+ (VTI.VT (bitconvert
+ (VTI.LdFrag addr:$src3)))))>,
+ EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD;
+ defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
+ (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
+ OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
+ "$src2, ${src3}"##VTI.BroadcastStr,
+ (OpNode VTI.RC:$src1, VTI.RC:$src2,
+ (VTI.VT (X86VBroadcast
+ (VTI.ScalarLdFrag addr:$src3))))>,
+ EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, T8PD;
+}
+
+multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode> {
+ let Predicates = [HasVNNI] in
+ defm Z : VNNI_rmb<Op, OpStr, OpNode, v16i32_info>, EVEX_V512;
+ let Predicates = [HasVNNI, HasVLX] in {
+ defm Z256 : VNNI_rmb<Op, OpStr, OpNode, v8i32x_info>, EVEX_V256;
+ defm Z128 : VNNI_rmb<Op, OpStr, OpNode, v4i32x_info>, EVEX_V128;
+ }
+}
+
+defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd>;
+defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds>;
+defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd>;
+defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds>;
+
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index c98aa3b9123..263babd09c5 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -533,6 +533,15 @@ def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTIFma, [SDNPCommutative
def X86rsqrt14 : SDNode<"X86ISD::RSQRT14", SDTFPUnaryOp>;
def X86rcp14 : SDNode<"X86ISD::RCP14", SDTFPUnaryOp>;
+
+// VNNI
+def SDTVnni : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
+def X86Vpdpbusd : SDNode<"X86ISD::VPDPBUSD", SDTVnni>;
+def X86Vpdpbusds : SDNode<"X86ISD::VPDPBUSDS", SDTVnni>;
+def X86Vpdpwssd : SDNode<"X86ISD::VPDPWSSD", SDTVnni>;
+def X86Vpdpwssds : SDNode<"X86ISD::VPDPWSSDS", SDTVnni>;
+
def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOpRound>;
def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOpRound>;
def X86exp2 : SDNode<"X86ISD::EXP2", SDTFPUnaryOpRound>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 97f11a32f46..8fb56b7121b 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -832,6 +832,7 @@ def NoVLX : Predicate<"!Subtarget->hasVLX()">;
def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
def PKU : Predicate<"Subtarget->hasPKU()">;
+def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 11f338b878a..bc1a5ec52fc 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1157,6 +1157,19 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
X86ISD::FNMSUB_RND),
+ X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_128, FMA_OP_MASK, X86ISD::VPDPBUSD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_256, FMA_OP_MASK, X86ISD::VPDPBUSD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_512, FMA_OP_MASK, X86ISD::VPDPBUSD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_128, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_256, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_512, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_128, FMA_OP_MASK, X86ISD::VPDPWSSD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_256, FMA_OP_MASK, X86ISD::VPDPWSSD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_512, FMA_OP_MASK, X86ISD::VPDPWSSD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_128, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
X86ISD::VPERMIV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
@@ -1377,6 +1390,19 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
X86ISD::FMADDSUB_RND),
+ X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_128, FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_256, FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_512, FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_128, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_256, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_512, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_128, FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_256, FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_512, FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_128, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_256, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_512, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0),
+
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ,
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ,
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index eb73b123a9f..76e7f7bf433 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -325,6 +325,7 @@ void X86Subtarget::initializeEnvironment() {
HasVLX = false;
HasADX = false;
HasPKU = false;
+ HasVNNI = false;
HasSHA = false;
HasPRFCHW = false;
HasRDSEED = false;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index e1711ece0c6..a10b4c07e6d 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -304,6 +304,9 @@ protected:
/// Processor has PKU extenstions
bool HasPKU;
+ /// Processor has AVX-512 Vector Neural Network Instructions
+ bool HasVNNI;
+
/// Processor supports MPX - Memory Protection Extensions
bool HasMPX;
@@ -530,6 +533,7 @@ public:
bool hasBWI() const { return HasBWI; }
bool hasVLX() const { return HasVLX; }
bool hasPKU() const { return HasPKU; }
+ bool hasVNNI() const { return HasVNNI; }
bool hasMPX() const { return HasMPX; }
bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
bool hasCLWB() const { return HasCLWB; }
OpenPOWER on IntegriCloud