summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-10-24 17:30:29 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-10-24 17:30:29 +0000
commit2cce074e8c39b94ba6d224ae356fb2ec33e7fdcd (patch)
treef3cd4ab3332b23e7a5aa940a76b89c04e7c560f8 /llvm/lib
parent18bfb3a5ec410d4675eb1a7c913dfeb60017df3f (diff)
downloadbcm5719-llvm-2cce074e8c39b94ba6d224ae356fb2ec33e7fdcd.tar.gz
bcm5719-llvm-2cce074e8c39b94ba6d224ae356fb2ec33e7fdcd.zip
[CostModel][X86] Enable non-uniform vector division by constants costs.
Non-uniform division/remainder handling was added back at D49248/D50765 - so share the 'mul+sub' costs that already exist for uniform cases. llvm-svn: 345164
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp88
1 files changed, 62 insertions, 26 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index ffc5a029040..29306d75454 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -290,11 +290,6 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SHL, MVT::v64i8, 2 }, // psllw + pand.
{ ISD::SRL, MVT::v64i8, 2 }, // psrlw + pand.
{ ISD::SRA, MVT::v64i8, 4 }, // psrlw, pand, pxor, psubb.
-
- { ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence
- { ISD::SREM, MVT::v32i16, 8 }, // vpmulhw+mul+sub sequence
- { ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence
- { ISD::UREM, MVT::v32i16, 8 }, // vpmulhuw+mul+sub sequence
};
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -308,11 +303,6 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v2i64, 1 },
{ ISD::SRA, MVT::v4i64, 1 },
{ ISD::SRA, MVT::v8i64, 1 },
-
- { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence
- { ISD::SREM, MVT::v16i32, 17 }, // vpmuldq+mul+sub sequence
- { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
- { ISD::UREM, MVT::v16i32, 17 }, // vpmuludq+mul+sub sequence
};
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -328,15 +318,6 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v32i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
-
- { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
- { ISD::SREM, MVT::v16i16, 8 }, // vpmulhw+mul+sub sequence
- { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence
- { ISD::UREM, MVT::v16i16, 8 }, // vpmulhuw+mul+sub sequence
- { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence
- { ISD::SREM, MVT::v8i32, 19 }, // vpmuldq+mul+sub sequence
- { ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence
- { ISD::UREM, MVT::v8i32, 19 }, // vpmuludq+mul+sub sequence
};
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
@@ -354,7 +335,65 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::SHL, MVT::v32i8, 4+2 }, // 2*(psllw + pand) + split.
{ ISD::SRL, MVT::v32i8, 4+2 }, // 2*(psrlw + pand) + split.
{ ISD::SRA, MVT::v32i8, 8+2 }, // 2*(psrlw, pand, pxor, psubb) + split.
+ };
+ // XOP has faster vXi8 shifts.
+ if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ ST->hasSSE2() && !ST->hasXOP()) {
+ if (const auto *Entry =
+ CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry AVX512BWConstCostTable[] = {
+ { ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence
+ { ISD::SREM, MVT::v32i16, 8 }, // vpmulhw+mul+sub sequence
+ { ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence
+ { ISD::UREM, MVT::v32i16, 8 }, // vpmulhuw+mul+sub sequence
+ };
+
+ if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+ ST->hasBWI()) {
+ if (const auto *Entry =
+ CostTableLookup(AVX512BWConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry AVX512ConstCostTable[] = {
+ { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence
+ { ISD::SREM, MVT::v16i32, 17 }, // vpmuldq+mul+sub sequence
+ { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
+ { ISD::UREM, MVT::v16i32, 17 }, // vpmuludq+mul+sub sequence
+ };
+
+ if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+ ST->hasAVX512()) {
+ if (const auto *Entry =
+ CostTableLookup(AVX512ConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry AVX2ConstCostTable[] = {
+ { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
+ { ISD::SREM, MVT::v16i16, 8 }, // vpmulhw+mul+sub sequence
+ { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence
+ { ISD::UREM, MVT::v16i16, 8 }, // vpmulhuw+mul+sub sequence
+ { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence
+ { ISD::SREM, MVT::v8i32, 19 }, // vpmuldq+mul+sub sequence
+ { ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence
+ { ISD::UREM, MVT::v8i32, 19 }, // vpmuludq+mul+sub sequence
+ };
+
+ if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+ ST->hasAVX2()) {
+ if (const auto *Entry = CostTableLookup(AVX2ConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
+ static const CostTblEntry SSE2ConstCostTable[] = {
{ ISD::SDIV, MVT::v16i16, 12+2 }, // 2*pmulhw sequence + split.
{ ISD::SREM, MVT::v16i16, 16+2 }, // 2*pmulhw+mul+sub sequence + split.
{ ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
@@ -373,7 +412,8 @@ int X86TTIImpl::getArithmeticInstrCost(
{ ISD::UREM, MVT::v4i32, 20 }, // pmuludq+mul+sub sequence
};
- if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
ST->hasSSE2()) {
// pmuldq sequence.
if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX())
@@ -385,12 +425,8 @@ int X86TTIImpl::getArithmeticInstrCost(
if (ISD == ISD::SREM && LT.second == MVT::v4i32 && ST->hasSSE41())
return LT.first * 20;
- // XOP has faster vXi8 shifts.
- if ((ISD != ISD::SHL && ISD != ISD::SRL && ISD != ISD::SRA) ||
- !ST->hasXOP())
- if (const auto *Entry =
- CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second))
- return LT.first * Entry->Cost;
+ if (const auto *Entry = CostTableLookup(SSE2ConstCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
}
static const CostTblEntry AVX2UniformCostTable[] = {
OpenPOWER on IntegriCloud