summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorAsaf Badouh <asaf.badouh@intel.com>2016-01-25 11:14:24 +0000
committerAsaf Badouh <asaf.badouh@intel.com>2016-01-25 11:14:24 +0000
commit655822ab7ec7c5acb45f2bf18662b6cd603b0397 (patch)
treef15c093a7b0b255dd2561514667e2c6b82ce0889 /llvm/lib
parent54f81ed325f05b46f5bc0b63524d72103437b795 (diff)
downloadbcm5719-llvm-655822ab7ec7c5acb45f2bf18662b6cd603b0397.tar.gz
bcm5719-llvm-655822ab7ec7c5acb45f2bf18662b6cd603b0397.zip
[X86][IFMA] adding intrinsics and encoding for multiply and add of unsigned 52bit integer
VPMADD52LUQ - Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit Products to Qword Accumulators VPMADD52HUQ - Packed Multiply of Unsigned 52-bit Unsigned Integers and Add High 52-bit Products to 64-bit Accumulators Differential Revision: http://reviews.llvm.org/D16407 llvm-svn: 258680
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h1
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td49
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td3
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h24
5 files changed, 79 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 813b54f5735..6d104e9bf72 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -21063,6 +21063,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FNMSUB_RND: return "X86ISD::FNMSUB_RND";
case X86ISD::FMADDSUB_RND: return "X86ISD::FMADDSUB_RND";
case X86ISD::FMSUBADD_RND: return "X86ISD::FMSUBADD_RND";
+ case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H";
+ case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L";
case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE";
case X86ISD::VREDUCE: return "X86ISD::VREDUCE";
case X86ISD::VGETMANT: return "X86ISD::VGETMANT";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index def395896b1..3f87a4926b7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -441,6 +441,7 @@ namespace llvm {
MULHRS,
// Multiply and Add Packed Integers
VPMADDUBSW, VPMADDWD,
+ VPMADD52L, VPMADD52H,
// FMA nodes
FMADD,
FNMADD,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 96f41695ff1..9a7e34aa7b8 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4698,6 +4698,55 @@ defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd
defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
//===----------------------------------------------------------------------===//
+// AVX-512 Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
+//===----------------------------------------------------------------------===//
+let Constraints = "$src1 = $dst" in {
+multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+ AVX512FMA3Base;
+
+ let mayLoad = 1 in {
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
+ AVX512FMA3Base;
+
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
+ !strconcat("$src2, ${src3}", _.BroadcastStr ),
+ (OpNode _.RC:$src1,
+ _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
+ AVX512FMA3Base, EVEX_B;
+ }
+}
+} // Constraints = "$src1 = $dst"
+
+multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ AVX512VLVectorVTInfo _> {
+ let Predicates = [HasIFMA] in {
+ defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info512>,
+ EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+ }
+ let Predicates = [HasVLX, HasIFMA] in {
+ defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info256>,
+ EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
+ defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, _.info128>,
+ EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
+ }
+}
+
+defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
+ avx512vl_i64_info>, VEX_W;
+defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
+ avx512vl_i64_info>, VEX_W;
+
+//===----------------------------------------------------------------------===//
// AVX-512 Scalar convert from sign integer to float/double
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 176cf56ebf8..92bc65c5155 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -471,6 +471,9 @@ def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound>;
def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound>;
def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound>;
+def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTFma>;
+def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTFma>;
+
def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", STDFp1SrcRm>;
def X86rcp28 : SDNode<"X86ISD::RCP28", STDFp1SrcRm>;
def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 0adb49b2a52..4d6f7c87910 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1891,6 +1891,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_qi_512, VPERM_3OP_MASK,
X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpmadd52h_uq_128 , FMA_OP_MASK,
+ X86ISD::VPMADD52H, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpmadd52h_uq_256 , FMA_OP_MASK,
+ X86ISD::VPMADD52H, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpmadd52h_uq_512 , FMA_OP_MASK,
+ X86ISD::VPMADD52H, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpmadd52l_uq_128 , FMA_OP_MASK,
+ X86ISD::VPMADD52L, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpmadd52l_uq_256 , FMA_OP_MASK,
+ X86ISD::VPMADD52L, 0),
+ X86_INTRINSIC_DATA(avx512_mask_vpmadd52l_uq_512 , FMA_OP_MASK,
+ X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
@@ -1979,6 +1991,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VPERMV3, 0),
X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_qi_512, VPERM_3OP_MASKZ,
X86ISD::VPERMV3, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpmadd52h_uq_128, FMA_OP_MASKZ,
+ X86ISD::VPMADD52H, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpmadd52h_uq_256, FMA_OP_MASKZ,
+ X86ISD::VPMADD52H, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpmadd52h_uq_512, FMA_OP_MASKZ,
+ X86ISD::VPMADD52H, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_128, FMA_OP_MASKZ,
+ X86ISD::VPMADD52L, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_256, FMA_OP_MASKZ,
+ X86ISD::VPMADD52L, 0),
+ X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_512, FMA_OP_MASKZ,
+ X86ISD::VPMADD52L, 0),
X86_INTRINSIC_DATA(avx512_pbroadcastb_128, INTR_TYPE_1OP_MASK,
X86ISD::VBROADCAST, 0),
X86_INTRINSIC_DATA(avx512_pbroadcastb_256, INTR_TYPE_1OP_MASK,
OpenPOWER on IntegriCloud