summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-10-20 18:00:35 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-10-20 18:00:35 +0000
commit365be4f95ccf9ab1cf217609678159acbe6b01a5 (patch)
treedfe49b348986033c18ab25512672fe7dc6c64d8d /llvm/lib
parent493091fdefc28ee4f64ae3640771237a0368ba72 (diff)
downloadbcm5719-llvm-365be4f95ccf9ab1cf217609678159acbe6b01a5.tar.gz
bcm5719-llvm-365be4f95ccf9ab1cf217609678159acbe6b01a5.zip
[CostModel][X86] Fixed AVX1/AVX512 sdiv/udiv uniformconst costs for 256/512 bit integer vectors
We weren't checking for uniform const costs before the general cost, resulting in very high estimates. llvm-svn: 284755
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp65
1 files changed, 48 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 11bce7c46f5..e14220807c8 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -140,6 +140,30 @@ int X86TTIImpl::getArithmeticInstrCost(
return Cost;
}
+ static const CostTblEntry AVX512BWUniformConstCostTable[] = {
+ { ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence
+ { ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence
+ };
+
+ if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ ST->hasBWI()) {
+ if (const auto *Entry = CostTableLookup(AVX512BWUniformConstCostTable, ISD,
+ LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry AVX512UniformConstCostTable[] = {
+ { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence
+ { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
+ };
+
+ if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ ST->hasAVX512()) {
+ if (const auto *Entry = CostTableLookup(AVX512UniformConstCostTable, ISD,
+ LT.second))
+ return LT.first * Entry->Cost;
+ }
+
static const CostTblEntry AVX2UniformConstCostTable[] = {
{ ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
@@ -156,6 +180,30 @@ int X86TTIImpl::getArithmeticInstrCost(
return LT.first * Entry->Cost;
}
+ static const CostTblEntry SSE2UniformConstCostTable[] = {
+ { ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence
+ { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
+ { ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence
+ { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
+ { ISD::SDIV, MVT::v8i32, 38 }, // pmuludq sequence
+ { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence
+ { ISD::UDIV, MVT::v8i32, 30 }, // pmuludq sequence
+ { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence
+ };
+
+ if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ ST->hasSSE2()) {
+ // pmuldq sequence.
+ if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX())
+ return LT.first * 30;
+ if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
+ return LT.first * 15;
+
+ if (const auto *Entry = CostTableLookup(SSE2UniformConstCostTable, ISD,
+ LT.second))
+ return LT.first * Entry->Cost;
+ }
+
static const CostTblEntry AVX512BWCostTable[] = {
// Vectorizing division is a bad idea. See the SSE2 table for more comments.
{ ISD::SDIV, MVT::v64i8, 64*20 },
@@ -292,15 +340,6 @@ int X86TTIImpl::getArithmeticInstrCost(
}
static const CostTblEntry
- SSE2UniformConstCostTable[] = {
- // Constant splats are cheaper for the following instructions.
- { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
- { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
- { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence
- { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence
- };
-
- static const CostTblEntry
SSE2UniformCostTable[] = {
// Uniform splats are cheaper for the following instructions.
{ ISD::SHL, MVT::v16i8, 1 }, // psllw.
@@ -334,14 +373,6 @@ int X86TTIImpl::getArithmeticInstrCost(
if (ST->hasSSE2() &&
((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
(Op2Info == TargetTransformInfo::OK_UniformValue))) {
- if (Op2Info == TargetTransformInfo::OK_UniformConstantValue) {
- // pmuldq sequence.
- if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
- return LT.first * 15;
- if (const auto *Entry =
- CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second))
- return LT.first * Entry->Cost;
- }
if (const auto *Entry =
CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
OpenPOWER on IntegriCloud