diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Support/Host.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86.td | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 44 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 26 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 4 |
10 files changed, 101 insertions, 0 deletions
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index f80ec6ba72b..c5436f7e228 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1266,6 +1266,9 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) { // VPCLMULQDQ (carry-less multiplication quadword) Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; + // Enable Vector Neural Network Instructions + Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; + bool HasLeafD = MaxLevel >= 0xd && !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 02186937cb4..e0745ec8001 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -160,6 +160,9 @@ def FeatureIFMA : SubtargetFeature<"avx512ifma", "HasIFMA", "true", [FeatureAVX512]>; def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", "Enable protection keys">; +def FeatureVNNI : SubtargetFeature<"avx512vnni", "HasVNNI", "true", + "Enable AVX-512 Vector Neural Network Instructions", + [FeatureAVX512]>; def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", "Enable packed carry-less multiplication instructions", [FeatureSSE2]>; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ac4b1d672bb..6aebfb7d52c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25250,6 +25250,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND"; case X86ISD::LWPINS: return "X86ISD::LWPINS"; case X86ISD::MGATHER: return "X86ISD::MGATHER"; + case X86ISD::VPDPBUSD: return "X86ISD::VPDPBUSD"; + case X86ISD::VPDPBUSDS: return "X86ISD::VPDPBUSDS"; + case X86ISD::VPDPWSSD: return "X86ISD::VPDPWSSD"; + case X86ISD::VPDPWSSDS: return "X86ISD::VPDPWSSDS"; } return nullptr; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 1327cf2c445..b79addfe198 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -481,6 +481,12 @@ namespace llvm { // op0 x op1 + op2. VPMADD52L, VPMADD52H, + // VNNI + VPDPBUSD, + VPDPBUSDS, + VPDPWSSD, + VPDPWSSDS, + // FMA nodes. // We use the target independent ISD::FMA for the non-inverted case. FNMADD, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 9c6e923b3d3..fa044254cf9 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -10160,3 +10160,47 @@ defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", avx512vl_i8_info, defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; +//===----------------------------------------------------------------------===// +// VNNI +//===----------------------------------------------------------------------===// + +let Constraints = "$src1 = $dst" in +multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, + X86VectorVTInfo VTI> { + defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), + (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, + "$src3, $src2", "$src2, $src3", + (VTI.VT (OpNode VTI.RC:$src1, + VTI.RC:$src2, VTI.RC:$src3))>, + EVEX_4V, T8PD; + defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), + (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, + "$src3, $src2", "$src2, $src3", + (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, + (VTI.VT (bitconvert + (VTI.LdFrag addr:$src3)))))>, + EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD; + defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), + (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), + OpStr, "${src3}"##VTI.BroadcastStr##", $src2", + "$src2, ${src3}"##VTI.BroadcastStr, + (OpNode VTI.RC:$src1, VTI.RC:$src2, + (VTI.VT (X86VBroadcast + (VTI.ScalarLdFrag addr:$src3))))>, + EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, T8PD; +} + +multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode> { + let Predicates = [HasVNNI] in + defm Z : VNNI_rmb<Op, OpStr, OpNode, v16i32_info>, EVEX_V512; + let Predicates = [HasVNNI, HasVLX] in { + defm Z256 : VNNI_rmb<Op, OpStr, OpNode, v8i32x_info>, EVEX_V256; + defm Z128 : VNNI_rmb<Op, OpStr, OpNode, v4i32x_info>, EVEX_V128; + } +} + +defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd>; +defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds>; +defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd>; +defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds>; + diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index c98aa3b9123..263babd09c5 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -533,6 +533,15 @@ def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTIFma, [SDNPCommutative def X86rsqrt14 : SDNode<"X86ISD::RSQRT14", SDTFPUnaryOp>; def X86rcp14 : SDNode<"X86ISD::RCP14", SDTFPUnaryOp>; + +// VNNI +def SDTVnni : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; +def X86Vpdpbusd : SDNode<"X86ISD::VPDPBUSD", SDTVnni>; +def X86Vpdpbusds : SDNode<"X86ISD::VPDPBUSDS", SDTVnni>; +def X86Vpdpwssd : SDNode<"X86ISD::VPDPWSSD", SDTVnni>; +def X86Vpdpwssds : SDNode<"X86ISD::VPDPWSSDS", SDTVnni>; + def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOpRound>; def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOpRound>; def X86exp2 : SDNode<"X86ISD::EXP2", SDTFPUnaryOpRound>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 97f11a32f46..8fb56b7121b 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -832,6 +832,7 @@ def NoVLX : Predicate<"!Subtarget->hasVLX()">; def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; def PKU : Predicate<"Subtarget->hasPKU()">; +def HasVNNI : Predicate<"Subtarget->hasVNNI()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 11f338b878a..bc1a5ec52fc 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1157,6 +1157,19 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB, X86ISD::FNMSUB_RND), + X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_128, FMA_OP_MASK, X86ISD::VPDPBUSD, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_256, FMA_OP_MASK, X86ISD::VPDPBUSD, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_512, FMA_OP_MASK, X86ISD::VPDPBUSD, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_128, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_256, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_512, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_128, FMA_OP_MASK, X86ISD::VPDPWSSD, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_256, FMA_OP_MASK, X86ISD::VPDPWSSD, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_512, FMA_OP_MASK, X86ISD::VPDPWSSD, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_128, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0), + X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK, X86ISD::VPERMIV3, 0), X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK, @@ -1377,6 +1390,19 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_512, FMA_OP_MASKZ, X86ISD::FMADDSUB, X86ISD::FMADDSUB_RND), + X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_128, FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_256, FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_512, FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_128, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_256, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_512, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_128, FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_256, FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_512, FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_128, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_256, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_512, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0), + X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ, X86ISD::VPERMV3, 0), X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ, diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index eb73b123a9f..76e7f7bf433 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -325,6 +325,7 @@ void X86Subtarget::initializeEnvironment() { HasVLX = false; HasADX = false; HasPKU = false; + HasVNNI = false; HasSHA = false; HasPRFCHW = false; HasRDSEED = false; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index e1711ece0c6..a10b4c07e6d 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -304,6 +304,9 @@ protected: /// Processor has PKU extenstions bool HasPKU; + /// Processor has AVX-512 Vector Neural Network Instructions + bool HasVNNI; + /// Processor supports MPX - Memory Protection Extensions bool HasMPX; @@ -530,6 +533,7 @@ public: bool hasBWI() const { return HasBWI; } bool hasVLX() const { return HasVLX; } bool hasPKU() const { return HasPKU; } + bool hasVNNI() const { return HasVNNI; } bool hasMPX() const { return HasMPX; } bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; } bool hasCLWB() const { return HasCLWB; } |

