diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-05-11 17:12:52 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-05-11 17:12:52 +0000 |
commit | 6b10fde69b801c6c632097f08aa802c34328263f (patch) | |
tree | 5afbd5435ec09b71c40ded1402418fc73376497f /llvm/lib/Target/X86/X86TargetTransformInfo.cpp | |
parent | a10f016006cad21460eeed5664c370391908b170 (diff) | |
download | bcm5719-llvm-6b10fde69b801c6c632097f08aa802c34328263f.tar.gz bcm5719-llvm-6b10fde69b801c6c632097f08aa802c34328263f.zip |
[CostModel][X86] Add min/max reduction costs for all SSE targets
The original costs stopped at SSE42, I've added conservative estimates for everything down to SSE1/SSE2 and moved some of the SSE42 costs to SSE41 (really only the addition of PCMPGT makes any difference).
I've also added missing vXi8 costs (we use PHMINPOSUW for i8/i16 for scarily quick results) and 256-bit vector costs for AVX1.
llvm-svn: 360528
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 96 |
1 files changed, 90 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 7f0e431bd08..7501834ea4c 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2552,15 +2552,37 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy, // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput // and make it as the cost. - static const CostTblEntry SSE42CostTblPairWise[] = { + static const CostTblEntry SSE1CostTblPairWise[] = { + {ISD::FMINNUM, MVT::v4f32, 4}, + }; + + static const CostTblEntry SSE2CostTblPairWise[] = { {ISD::FMINNUM, MVT::v2f64, 3}, + {ISD::SMIN, MVT::v2i64, 6}, + {ISD::UMIN, MVT::v2i64, 8}, + {ISD::SMIN, MVT::v4i32, 6}, + {ISD::UMIN, MVT::v4i32, 8}, + {ISD::SMIN, MVT::v8i16, 4}, + {ISD::UMIN, MVT::v8i16, 6}, + {ISD::SMIN, MVT::v16i8, 8}, + {ISD::UMIN, MVT::v16i8, 6}, + }; + + static const CostTblEntry SSE41CostTblPairWise[] = { {ISD::FMINNUM, MVT::v4f32, 2}, - {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8" - {ISD::UMIN, MVT::v2i64, 8}, // The data reported by the IACA is "8.6" + {ISD::SMIN, MVT::v2i64, 9}, + {ISD::UMIN, MVT::v2i64,10}, {ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5" {ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8" {ISD::SMIN, MVT::v8i16, 2}, {ISD::UMIN, MVT::v8i16, 2}, + {ISD::SMIN, MVT::v16i8, 3}, + {ISD::UMIN, MVT::v16i8, 3}, + }; + + static const CostTblEntry SSE42CostTblPairWise[] = { + {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8" + {ISD::UMIN, MVT::v2i64, 8}, // The data reported by the IACA is "8.6" }; static const CostTblEntry AVX1CostTblPairWise[] = { @@ -2573,8 +2595,16 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy, {ISD::UMIN, MVT::v4i32, 1}, {ISD::SMIN, MVT::v8i16, 1}, {ISD::UMIN, MVT::v8i16, 1}, + {ISD::SMIN, MVT::v16i8, 2}, + {ISD::UMIN, MVT::v16i8, 2}, + {ISD::SMIN, MVT::v4i64, 7}, + {ISD::UMIN, MVT::v4i64, 7}, {ISD::SMIN, MVT::v8i32, 3}, {ISD::UMIN, MVT::v8i32, 3}, + {ISD::SMIN, MVT::v16i16, 3}, + {ISD::UMIN, MVT::v16i16, 3}, + {ISD::SMIN, MVT::v32i8, 3}, + {ISD::UMIN, MVT::v32i8, 3}, }; static const CostTblEntry AVX2CostTblPairWise[] = { @@ -2597,15 +2627,37 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy, {ISD::UMIN, MVT::v16i32, 1}, }; - static const CostTblEntry SSE42CostTblNoPairWise[] = { + static const CostTblEntry SSE1CostTblNoPairWise[] = { + {ISD::FMINNUM, MVT::v4f32, 4}, + }; + + static const CostTblEntry SSE2CostTblNoPairWise[] = { {ISD::FMINNUM, MVT::v2f64, 3}, + {ISD::SMIN, MVT::v2i64, 6}, + {ISD::UMIN, MVT::v2i64, 8}, + {ISD::SMIN, MVT::v4i32, 6}, + {ISD::UMIN, MVT::v4i32, 8}, + {ISD::SMIN, MVT::v8i16, 4}, + {ISD::UMIN, MVT::v8i16, 6}, + {ISD::SMIN, MVT::v16i8, 8}, + {ISD::UMIN, MVT::v16i8, 6}, + }; + + static const CostTblEntry SSE41CostTblNoPairWise[] = { {ISD::FMINNUM, MVT::v4f32, 3}, - {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8" - {ISD::UMIN, MVT::v2i64, 9}, // The data reported by the IACA is "8.6" + {ISD::SMIN, MVT::v2i64, 9}, + {ISD::UMIN, MVT::v2i64,11}, {ISD::SMIN, MVT::v4i32, 1}, // The data reported by the IACA is "1.5" {ISD::UMIN, MVT::v4i32, 2}, // The data reported by the IACA is "1.8" {ISD::SMIN, MVT::v8i16, 1}, // The data reported by the IACA is "1.5" {ISD::UMIN, MVT::v8i16, 2}, // The data reported by the IACA is "1.8" + {ISD::SMIN, MVT::v16i8, 3}, + {ISD::UMIN, MVT::v16i8, 3}, + }; + + static const CostTblEntry SSE42CostTblNoPairWise[] = { + {ISD::SMIN, MVT::v2i64, 7}, // The data reported by the IACA is "6.8" + {ISD::UMIN, MVT::v2i64, 9}, // The data reported by the IACA is "8.6" }; static const CostTblEntry AVX1CostTblNoPairWise[] = { @@ -2618,8 +2670,16 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy, {ISD::UMIN, MVT::v4i32, 1}, {ISD::SMIN, MVT::v8i16, 1}, {ISD::UMIN, MVT::v8i16, 1}, + {ISD::SMIN, MVT::v16i8, 2}, + {ISD::UMIN, MVT::v16i8, 2}, + {ISD::SMIN, MVT::v4i64, 7}, + {ISD::UMIN, MVT::v4i64, 7}, {ISD::SMIN, MVT::v8i32, 2}, {ISD::UMIN, MVT::v8i32, 2}, + {ISD::SMIN, MVT::v16i16, 2}, + {ISD::UMIN, MVT::v16i16, 2}, + {ISD::SMIN, MVT::v32i8, 2}, + {ISD::UMIN, MVT::v32i8, 2}, }; static const CostTblEntry AVX2CostTblNoPairWise[] = { @@ -2658,6 +2718,18 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy, if (ST->hasSSE42()) if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy)) return LT.first * Entry->Cost; + + if (ST->hasSSE41()) + if (const auto *Entry = CostTableLookup(SSE41CostTblPairWise, ISD, MTy)) + return LT.first * Entry->Cost; + + if (ST->hasSSE2()) + if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy)) + return LT.first * Entry->Cost; + + if (ST->hasSSE1()) + if (const auto *Entry = CostTableLookup(SSE1CostTblPairWise, ISD, MTy)) + return LT.first * Entry->Cost; } else { if (ST->hasAVX512()) if (const auto *Entry = @@ -2675,6 +2747,18 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy, if (ST->hasSSE42()) if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy)) return LT.first * Entry->Cost; + + if (ST->hasSSE41()) + if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy)) + return LT.first * Entry->Cost; + + if (ST->hasSSE2()) + if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) + return LT.first * Entry->Cost; + + if (ST->hasSSE1()) + if (const auto *Entry = CostTableLookup(SSE1CostTblNoPairWise, ISD, MTy)) + return LT.first * Entry->Cost; } return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned); |