diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2017-09-08 13:49:36 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2017-09-08 13:49:36 +0000 |
commit | 6dd29fccb881cd3f766effe22a0732d7f02b37fe (patch) | |
tree | 0b6aaca1b921fa53193282087296a8fe9bf54d9f /llvm/lib/Target/X86/X86TargetTransformInfo.cpp | |
parent | 46dfb7a39d7eb7e7b2439494ad18e31056aa6d09 (diff) | |
download | bcm5719-llvm-6dd29fccb881cd3f766effe22a0732d7f02b37fe.tar.gz bcm5719-llvm-6dd29fccb881cd3f766effe22a0732d7f02b37fe.zip |
[SLP] Support for horizontal min/max reduction.
SLP vectorizer supports horizontal reductions for Add/FAdd binary
operations. Patch adds support for horizontal min/max reductions.
Function getReductionCost() is split to getArithmeticReductionCost() for
binary operation reductions and getMinMaxReductionCost() for min/max
reductions.
Patch fixes PR26956.
Differential revision: https://reviews.llvm.org/D27846
llvm-svn: 312791
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 146 |
1 files changed, 146 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 871a38d0014..79f192ce062 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1999,6 +1999,152 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy, return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise); } +int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy, + bool IsPairwise, bool IsUnsigned) { + std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); + + MVT MTy = LT.second; + + int ISD; + if (ValTy->isIntOrIntVectorTy()) { + ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN; + } else { + assert(ValTy->isFPOrFPVectorTy() && + "Expected float point or integer vector type."); + ISD = ISD::FMINNUM; + } + + // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput + // and make it as the cost. + + static const CostTblEntry SSE42CostTblPairWise[] = { + {ISD::FMINNUM, MVT::v2f64, 3}, + {ISD::FMINNUM, MVT::v4f32, 2}, + {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8" + {ISD::UMIN, MVT::v2i64, 8}, // The data reported by the IACA is "8.6" + {ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5" + {ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8" + {ISD::SMIN, MVT::v8i16, 2}, + {ISD::UMIN, MVT::v8i16, 2}, + }; + + static const CostTblEntry AVX1CostTblPairWise[] = { + {ISD::FMINNUM, MVT::v4f32, 1}, + {ISD::FMINNUM, MVT::v4f64, 1}, + {ISD::FMINNUM, MVT::v8f32, 2}, + {ISD::SMIN, MVT::v2i64, 3}, + {ISD::UMIN, MVT::v2i64, 3}, + {ISD::SMIN, MVT::v4i32, 1}, + {ISD::UMIN, MVT::v4i32, 1}, + {ISD::SMIN, MVT::v8i16, 1}, + {ISD::UMIN, MVT::v8i16, 1}, + {ISD::SMIN, MVT::v8i32, 3}, + {ISD::UMIN, MVT::v8i32, 3}, + }; + + static const CostTblEntry AVX2CostTblPairWise[] = { + {ISD::SMIN, MVT::v4i64, 2}, + {ISD::UMIN, MVT::v4i64, 2}, + {ISD::SMIN, MVT::v8i32, 1}, + {ISD::UMIN, MVT::v8i32, 1}, + {ISD::SMIN, MVT::v16i16, 1}, + {ISD::UMIN, MVT::v16i16, 1}, + {ISD::SMIN, MVT::v32i8, 2}, + {ISD::UMIN, MVT::v32i8, 2}, + }; + + static const CostTblEntry AVX512CostTblPairWise[] = { + {ISD::FMINNUM, MVT::v8f64, 1}, + {ISD::FMINNUM, MVT::v16f32, 2}, + {ISD::SMIN, MVT::v8i64, 2}, + {ISD::UMIN, MVT::v8i64, 2}, + {ISD::SMIN, MVT::v16i32, 1}, + {ISD::UMIN, MVT::v16i32, 1}, + }; + + static const CostTblEntry SSE42CostTblNoPairWise[] = { + {ISD::FMINNUM, MVT::v2f64, 3}, + {ISD::FMINNUM, MVT::v4f32, 3}, + {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8" + {ISD::UMIN, MVT::v2i64, 9}, // The data reported by the IACA is "8.6" + {ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5" + {ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8" + {ISD::SMIN, MVT::v8i16, 1}, // The data reported by the IACA is "1.5" + {ISD::UMIN, MVT::v8i16, 2}, // The data reported by the IACA is "1.8" + }; + + static const CostTblEntry AVX1CostTblNoPairWise[] = { + {ISD::FMINNUM, MVT::v4f32, 1}, + {ISD::FMINNUM, MVT::v4f64, 1}, + {ISD::FMINNUM, MVT::v8f32, 1}, + {ISD::SMIN, MVT::v2i64, 3}, + {ISD::UMIN, MVT::v2i64, 3}, + {ISD::SMIN, MVT::v4i32, 1}, + {ISD::UMIN, MVT::v4i32, 1}, + {ISD::SMIN, MVT::v8i16, 1}, + {ISD::UMIN, MVT::v8i16, 1}, + {ISD::SMIN, MVT::v8i32, 2}, + {ISD::UMIN, MVT::v8i32, 2}, + }; + + static const CostTblEntry AVX2CostTblNoPairWise[] = { + {ISD::SMIN, MVT::v4i64, 1}, + {ISD::UMIN, MVT::v4i64, 1}, + {ISD::SMIN, MVT::v8i32, 1}, + {ISD::UMIN, MVT::v8i32, 1}, + {ISD::SMIN, MVT::v16i16, 1}, + {ISD::UMIN, MVT::v16i16, 1}, + {ISD::SMIN, MVT::v32i8, 1}, + {ISD::UMIN, MVT::v32i8, 1}, + }; + + static const CostTblEntry AVX512CostTblNoPairWise[] = { + {ISD::FMINNUM, MVT::v8f64, 1}, + {ISD::FMINNUM, MVT::v16f32, 2}, + {ISD::SMIN, MVT::v8i64, 1}, + {ISD::UMIN, MVT::v8i64, 1}, + {ISD::SMIN, MVT::v16i32, 1}, + {ISD::UMIN, MVT::v16i32, 1}, + }; + + if (IsPairwise) { + if (ST->hasAVX512()) + if (const auto *Entry = CostTableLookup(AVX512CostTblPairWise, ISD, MTy)) + return LT.first * Entry->Cost; + + if (ST->hasAVX2()) + if (const auto *Entry = CostTableLookup(AVX2CostTblPairWise, ISD, MTy)) + return LT.first * Entry->Cost; + + if (ST->hasAVX()) + if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy)) + return LT.first * Entry->Cost; + + if (ST->hasSSE42()) + if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy)) + return LT.first * Entry->Cost; + } else { + if (ST->hasAVX512()) + if (const auto *Entry = + CostTableLookup(AVX512CostTblNoPairWise, ISD, MTy)) + return LT.first * Entry->Cost; + + if (ST->hasAVX2()) + if (const auto *Entry = CostTableLookup(AVX2CostTblNoPairWise, ISD, MTy)) + return LT.first * Entry->Cost; + + if (ST->hasAVX()) + if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) + return LT.first * Entry->Cost; + + if (ST->hasSSE42()) + if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy)) + return LT.first * Entry->Cost; + } + + return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned); +} + /// \brief Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. |