summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-10-27 15:27:00 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-10-27 15:27:00 +0000
commit820e1326d726f8219b0de369f8dc14c666197c17 (patch)
tree726505c42b9e22c99ffe2bd44eef3796a7ac61ec /llvm/lib
parente372aecb8ac914133c960bd8ea5c52c5fdebe81d (diff)
downloadbcm5719-llvm-820e1326d726f8219b0de369f8dc14c666197c17.tar.gz
bcm5719-llvm-820e1326d726f8219b0de369f8dc14c666197c17.zip
[X86][AVX512DQ] Improve lowering of MUL v2i64 and v4i64
With DQI but without VLX, lower v2i64 and v4i64 MUL operations with v8i64 MUL (vpmullq). Updated cost table accordingly. Differential Revision: https://reviews.llvm.org/D26011 llvm-svn: 285304
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp19
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp13
2 files changed, 32 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5f1bf7096fa..bd378cfd232 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19854,6 +19854,25 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) &&
"Only know how to lower V2I64/V4I64/V8I64 multiply");
+ // AVX512DQ - extend to 512 bit vector.
+ // FIXME: This can possibly be converted to a tablegen pattern.
+ if (Subtarget.hasDQI()) {
+ assert(!Subtarget.hasVLX() && "AVX512DQVL vXi64 multiply is legal");
+ assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
+ "AVX512DQ v8i64 multiply is legal");
+
+ MVT NewVT = MVT::getVectorVT(MVT::i64, 512 / VT.getScalarSizeInBits());
+ SDValue A512 =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT, DAG.getUNDEF(NewVT), A,
+ DAG.getIntPtrConstant(0, dl));
+ SDValue B512 =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT, DAG.getUNDEF(NewVT), B,
+ DAG.getIntPtrConstant(0, dl));
+ SDValue MulNode = DAG.getNode(ISD::MUL, dl, NewVT, A512, B512);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, MulNode,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
// Ahi = psrlqi(a, 32);
// Bhi = psrlqi(b, 32);
//
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index a2cc73addf4..f8d3a04c367 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -204,6 +204,19 @@ int X86TTIImpl::getArithmeticInstrCost(
return LT.first * Entry->Cost;
}
+ static const CostTblEntry AVX512DQCostTable[] = {
+ { ISD::MUL, MVT::v2i64, 1 },
+ { ISD::MUL, MVT::v4i64, 1 },
+ { ISD::MUL, MVT::v8i64, 1 }
+ };
+
+ // Look for AVX512DQ lowering tricks for custom cases.
+ if (ST->hasDQI()) {
+ if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD,
+ LT.second))
+ return LT.first * Entry->Cost;
+ }
+
static const CostTblEntry AVX512BWCostTable[] = {
// Vectorizing division is a bad idea. See the SSE2 table for more comments.
{ ISD::SDIV, MVT::v64i8, 64*20 },
OpenPOWER on IntegriCloud