diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Support/Host.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86.td | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 52 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 79 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 4 | 
11 files changed, 175 insertions, 3 deletions
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 31f86eb3fec..3ce636ffcda 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1217,6 +1217,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {    Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;    Features["pku"]             = HasLeaf7 && ((ECX >>  4) & 1);    Features["avx512vbmi2"]     = HasLeaf7 && ((ECX >>  6) & 1) && HasAVX512Save; +  Features["gfni"]            = HasLeaf7 && ((ECX >>  8) & 1);    Features["vaes"]            = HasLeaf7 && ((ECX >>  9) & 1) && HasAVXSave;    Features["vpclmulqdq"]      = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;    Features["avx512vnni"]      = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 56a6d57c195..8c1136341de 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -169,6 +169,9 @@ def FeatureBITALG  : SubtargetFeature<"avx512bitalg", "HasBITALG", "true",  def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",                           "Enable packed carry-less multiplication instructions",                                 [FeatureSSE2]>; +def FeatureGFNI    : SubtargetFeature<"gfni", "HasGFNI", "true", +                         "Enable Galois Field Arithmetic Instructions", +                               [FeatureSSE2]>;  def FeatureVPCLMULQDQ : SubtargetFeature<"vpclmulqdq", "HasVPCLMULQDQ", "true",                                           "Enable vpclmulqdq instructions",                                           [FeatureAVX, FeaturePCLMUL]>; @@ -698,8 +701,8 @@ def ICLFeatures : ProcessorFeatures<CNLFeatures.Value, [    FeatureVBMI2,    FeatureVNNI,    FeatureVPCLMULQDQ, -  FeatureVPOPCNTDQ -  // TODO: Add GFNI when it is implemented. +  FeatureVPOPCNTDQ, +  FeatureGFNI  ]>;  class IcelakeProc<string Name> : ProcModel<Name, SkylakeServerModel, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2163efd30aa..892c7e24abd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -25254,6 +25254,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {    case X86ISD::VPDPWSSD:           return "X86ISD::VPDPWSSD";    case X86ISD::VPDPWSSDS:          return "X86ISD::VPDPWSSDS";    case X86ISD::VPSHUFBITQMB:       return "X86ISD::VPSHUFBITQMB"; +  case X86ISD::GF2P8MULB:          return "X86ISD::GF2P8MULB"; +  case X86ISD::GF2P8AFFINEQB:      return "X86ISD::GF2P8AFFINEQB"; +  case X86ISD::GF2P8AFFINEINVQB:   return "X86ISD::GF2P8AFFINEINVQB";    }    return nullptr;  } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 61b03be52a9..90830f4d5d1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -587,6 +587,9 @@ namespace llvm {        // Conversions between float and half-float.        CVTPS2PH, CVTPH2PS, CVTPH2PS_RND, +      // Galois Field Arithmetic Instructions +      GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB, +        // LWP insert record.        LWPINS, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 626ad00933c..1f2e7197ba7 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -10242,3 +10242,55 @@ multiclass VPSHUFBITQMB_common<AVX512VLVectorVTInfo VTI> {  defm VPSHUFBITQMB : VPSHUFBITQMB_common<avx512vl_i8_info>; +//===----------------------------------------------------------------------===// +// GFNI +//===----------------------------------------------------------------------===// + +multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> { +  let Predicates = [HasGFNI, HasAVX512, HasBWI] in +  defm Z      : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, +                                SSE_INTALU_ITINS_P, 1>, EVEX_V512; +  let Predicates = [HasGFNI, HasVLX, HasBWI] in { +    defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, +                                SSE_INTALU_ITINS_P, 1>, EVEX_V256; +    defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, +                                SSE_INTALU_ITINS_P, 1>, EVEX_V128; +  } +} + +defm GF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>, +                 EVEX_CD8<8, CD8VF>, T8PD; + +multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, +                                      X86VectorVTInfo VTI, +                                      X86VectorVTInfo BcstVTI> +           : avx512_3Op_rm_imm8<Op, OpStr, OpNode, VTI, VTI> { +  let ExeDomain = VTI.ExeDomain in +  defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), +                (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), +                OpStr, "$src3, ${src2}"##BcstVTI.BroadcastStr##", $src1", +                "$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3", +                (OpNode (VTI.VT VTI.RC:$src1), +                 (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))), +                 (i8 imm:$src3))>, EVEX_B; +} + +multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> { +  let Predicates = [HasGFNI, HasAVX512, HasBWI] in +  defm Z      : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, v64i8_info, +                                           v8i64_info>, EVEX_V512; +  let Predicates = [HasGFNI, HasVLX, HasBWI] in { +    defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, v32i8x_info, +                                           v4i64x_info>, EVEX_V256; +    defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, v16i8x_info, +                                           v2i64x_info>, EVEX_V128; +  } +} + +defm GF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", +                                                  X86GF2P8affineinvqb>, +                        EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; +defm GF2P8AFFINEQB    : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", +                                                  X86GF2P8affineqb>, +                        EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; + diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index b013d66a21d..cb27fcce349 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -672,6 +672,11 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",  def X86cvt2mask   : SDNode<"X86ISD::CVT2MASK", SDTIntTruncOp>; +// galois field arithmetic +def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>; +def X86GF2P8affineqb    : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>; +def X86GF2P8mulb        : SDNode<"X86ISD::GF2P8MULB", SDTIntBinOp>; +  //===----------------------------------------------------------------------===//  // SSE Complex Patterns  //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 97d3e6dfb44..a790d1a4141 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -848,6 +848,7 @@ def HasPCLMUL    : Predicate<"Subtarget->hasPCLMUL()">;  def NoVLX_Or_NoVPCLMULQDQ :                      Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVPCLMULQDQ()">;  def HasVPCLMULQDQ : Predicate<"Subtarget->hasVPCLMULQDQ()">; +def HasGFNI      : Predicate<"Subtarget->hasGFNI()">;  def HasFMA       : Predicate<"Subtarget->hasFMA()">;  def HasFMA4      : Predicate<"Subtarget->hasFMA4()">;  def NoFMA4       : Predicate<"!Subtarget->hasFMA4()">; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index dc52f867dd5..03da8c3665d 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8466,3 +8466,82 @@ def : Pat<(xor FR128:$src1, FR128:$src2),            (COPY_TO_REGCLASS             (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),                      (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>; + +//===----------------------------------------------------------------------===// +// GFNI instructions +//===----------------------------------------------------------------------===// + +multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT, +                        RegisterClass RC, PatFrag MemOpFrag, +                        X86MemOperand X86MemOp, bit Is2Addr = 0> { +  let ExeDomain = SSEPackedInt, +      AsmString = !if(Is2Addr, +        OpcodeStr##"\t{$src2, $dst|$dst, $src2}", +        OpcodeStr##"\t{$src2, $src1, $dst|$dst, $src1, $src2}") in { +    let isCommutable = 1 in +    def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "", +                 [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))], +                 SSE_INTALU_ITINS_P.rr>, +             Sched<[SSE_INTALU_ITINS_P.Sched]>, T8PD; + +    def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "", +                 [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, +                                 (bitconvert (MemOpFrag addr:$src2)))))], +                 SSE_INTALU_ITINS_P.rm>, +             Sched<[SSE_INTALU_ITINS_P.Sched.Folded, ReadAfterLd]>, T8PD; +  } +} + +multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT, +                           SDNode OpNode, RegisterClass RC, PatFrag MemOpFrag, +                           X86MemOperand X86MemOp, bit Is2Addr = 0> { +  let AsmString = !if(Is2Addr, +      OpStr##"\t{$src3, $src2, $dst|$dst, $src2, $src3}", +      OpStr##"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in { +  def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst), +              (ins RC:$src1, RC:$src2, u8imm:$src3), "", +              [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))], +              SSE_INTALU_ITINS_P.rr, SSEPackedInt>, +              Sched<[WriteVecALU]>; +  def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst), +              (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "", +              [(set RC:$dst, (OpVT (OpNode RC:$src1, +                                    (bitconvert (MemOpFrag addr:$src2)), +                              imm:$src3)))], +              SSE_INTALU_ITINS_P.rm, SSEPackedInt>, +              Sched<[WriteVecALU.Folded, ReadAfterLd]>; +  } +} + +multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> { +  let Constraints = "$src1 = $dst", +      Predicates  = [HasGFNI, UseSSE2] in +  defm NAME         : GF2P8AFFINE_rmi<Op, OpStr, v16i8, OpNode, +                                      VR128, loadv2i64, i128mem, 1>; +  let Predicates  = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in { +    defm V##NAME    : GF2P8AFFINE_rmi<Op, "v"##OpStr, v16i8, OpNode, VR128, +                                      loadv2i64, i128mem>, VEX_4V, VEX_W; +    defm V##NAME##Y : GF2P8AFFINE_rmi<Op, "v"##OpStr, v32i8, OpNode, VR256, +                                      loadv4i64, i256mem>, VEX_4V, VEX_L, VEX_W; +  } +} + +// GF2P8MULB +let Constraints = "$src1 = $dst", +    Predicates  = [HasGFNI, UseSSE2] in +defm GF2P8MULB      : GF2P8MULB_rm<"gf2p8mulb", v16i8, VR128, memopv2i64, +                                    i128mem, 1>; +let Predicates  = [HasGFNI, HasAVX, NoVLX_Or_NoBWI] in { +  defm VGF2P8MULB   : GF2P8MULB_rm<"vgf2p8mulb", v16i8, VR128, loadv2i64, +                                   i128mem>, VEX_4V; +  defm VGF2P8MULBY  : GF2P8MULB_rm<"vgf2p8mulb", v32i8, VR256, loadv4i64, +                                   i256mem>, VEX_4V, VEX_L; +} +// GF2P8AFFINEINVQB, GF2P8AFFINEQB +let isCommutable = 0 in { +  defm GF2P8AFFINEINVQB : GF2P8AFFINE_common<0xCF, "gf2p8affineinvqb", +                                             X86GF2P8affineinvqb>, TAPD; +  defm GF2P8AFFINEQB    : GF2P8AFFINE_common<0xCE, "gf2p8affineqb", +                                             X86GF2P8affineqb>, TAPD; +} + diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 598994d07ad..fae0889950b 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1170,7 +1170,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {    X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),    X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0), -  X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK, + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,                      X86ISD::VPERMIV3, 0),    X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,                      X86ISD::VPERMIV3, 0), @@ -1700,6 +1700,26 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {    X86_INTRINSIC_DATA(vcvtph2ps_256,     INTR_TYPE_1OP, X86ISD::CVTPH2PS, 0),    X86_INTRINSIC_DATA(vcvtps2ph_128,     INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0),    X86_INTRINSIC_DATA(vcvtps2ph_256,     INTR_TYPE_2OP, X86ISD::CVTPS2PH, 0), + +  X86_INTRINSIC_DATA(vgf2p8affineinvqb_128, INTR_TYPE_3OP, +                     X86ISD::GF2P8AFFINEINVQB, 0), +  X86_INTRINSIC_DATA(vgf2p8affineinvqb_256, INTR_TYPE_3OP, +                     X86ISD::GF2P8AFFINEINVQB, 0), +  X86_INTRINSIC_DATA(vgf2p8affineinvqb_512, INTR_TYPE_3OP, +                     X86ISD::GF2P8AFFINEINVQB, 0), +  X86_INTRINSIC_DATA(vgf2p8affineqb_128, INTR_TYPE_3OP, +                     X86ISD::GF2P8AFFINEQB, 0), +  X86_INTRINSIC_DATA(vgf2p8affineqb_256, INTR_TYPE_3OP, +                     X86ISD::GF2P8AFFINEQB, 0), +  X86_INTRINSIC_DATA(vgf2p8affineqb_512, INTR_TYPE_3OP, +                     X86ISD::GF2P8AFFINEQB, 0), +  X86_INTRINSIC_DATA(vgf2p8mulb_128, INTR_TYPE_2OP, +                     X86ISD::GF2P8MULB, 0), +  X86_INTRINSIC_DATA(vgf2p8mulb_256, INTR_TYPE_2OP, +                     X86ISD::GF2P8MULB, 0), +  X86_INTRINSIC_DATA(vgf2p8mulb_512, INTR_TYPE_2OP, +                     X86ISD::GF2P8MULB, 0), +    X86_INTRINSIC_DATA(xop_vpcomb,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),    X86_INTRINSIC_DATA(xop_vpcomd,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),    X86_INTRINSIC_DATA(xop_vpcomq,        INTR_TYPE_3OP, X86ISD::VPCOM, 0), diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index 0f995404618..72c08e21799 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -299,6 +299,7 @@ void X86Subtarget::initializeEnvironment() {    HasXSAVES = false;    HasPCLMUL = false;    HasVPCLMULQDQ = false; +  HasGFNI = false;    HasFMA = false;    HasFMA4 = false;    HasXOP = false; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 50e1a742a0f..740b9ddba09 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -128,6 +128,9 @@ protected:    bool HasPCLMUL;    bool HasVPCLMULQDQ; +  /// Target has Galois Field Arithmetic instructions +  bool HasGFNI; +    /// Target has 3-operand fused multiply-add    bool HasFMA; @@ -480,6 +483,7 @@ public:    bool hasXSAVES() const { return HasXSAVES; }    bool hasPCLMUL() const { return HasPCLMUL; }    bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; } +  bool hasGFNI() const { return HasGFNI; }    // Prefer FMA4 to FMA - its better for commutation/memory folding and    // has equal or better performance on all supported targets.    bool hasFMA() const { return HasFMA; }  | 

