diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 65 |
1 files changed, 48 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 11bce7c46f5..e14220807c8 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -140,6 +140,30 @@ int X86TTIImpl::getArithmeticInstrCost( return Cost; } + static const CostTblEntry AVX512BWUniformConstCostTable[] = { + { ISD::SDIV, MVT::v32i16, 6 }, // vpmulhw sequence + { ISD::UDIV, MVT::v32i16, 6 }, // vpmulhuw sequence + }; + + if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && + ST->hasBWI()) { + if (const auto *Entry = CostTableLookup(AVX512BWUniformConstCostTable, ISD, + LT.second)) + return LT.first * Entry->Cost; + } + + static const CostTblEntry AVX512UniformConstCostTable[] = { + { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence + { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence + }; + + if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && + ST->hasAVX512()) { + if (const auto *Entry = CostTableLookup(AVX512UniformConstCostTable, ISD, + LT.second)) + return LT.first * Entry->Cost; + } + static const CostTblEntry AVX2UniformConstCostTable[] = { { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle. @@ -156,6 +180,30 @@ int X86TTIImpl::getArithmeticInstrCost( return LT.first * Entry->Cost; } + static const CostTblEntry SSE2UniformConstCostTable[] = { + { ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence + { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence + { ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence + { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence + { ISD::SDIV, MVT::v8i32, 38 }, // pmuludq sequence + { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence + { ISD::UDIV, MVT::v8i32, 30 }, // pmuludq sequence + { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence + }; + + if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && + ST->hasSSE2()) { + // pmuldq sequence. + if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX()) + return LT.first * 30; + if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41()) + return LT.first * 15; + + if (const auto *Entry = CostTableLookup(SSE2UniformConstCostTable, ISD, + LT.second)) + return LT.first * Entry->Cost; + } + static const CostTblEntry AVX512BWCostTable[] = { // Vectorizing division is a bad idea. See the SSE2 table for more comments. { ISD::SDIV, MVT::v64i8, 64*20 }, @@ -292,15 +340,6 @@ int X86TTIImpl::getArithmeticInstrCost( } static const CostTblEntry - SSE2UniformConstCostTable[] = { - // Constant splats are cheaper for the following instructions. - { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence - { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence - { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence - { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence - }; - - static const CostTblEntry SSE2UniformCostTable[] = { // Uniform splats are cheaper for the following instructions. { ISD::SHL, MVT::v16i8, 1 }, // psllw. @@ -334,14 +373,6 @@ int X86TTIImpl::getArithmeticInstrCost( if (ST->hasSSE2() && ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || (Op2Info == TargetTransformInfo::OK_UniformValue))) { - if (Op2Info == TargetTransformInfo::OK_UniformConstantValue) { - // pmuldq sequence. - if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41()) - return LT.first * 15; - if (const auto *Entry = - CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second)) - return LT.first * Entry->Cost; - } if (const auto *Entry = CostTableLookup(SSE2UniformCostTable, ISD, LT.second)) return LT.first * Entry->Cost; |