diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-07 17:03:51 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-07 17:03:51 +0000 |
commit | 100eae1ee06ee97daac0cecf5189e9cfc2b6f11b (patch) | |
tree | 827da153268b53d2252265796579fea2d483c8f1 /llvm/lib/Target/X86 | |
parent | 32f8d560ddd7ed27098ab5c77fa22aa51806cf6a (diff) | |
download | bcm5719-llvm-100eae1ee06ee97daac0cecf5189e9cfc2b6f11b.tar.gz bcm5719-llvm-100eae1ee06ee97daac0cecf5189e9cfc2b6f11b.zip |
[CostModel][X86] Reordered AVX1 arithmetic cost LUT into descending target order. NFCI.
llvm-svn: 291352
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 54 |
1 files changed, 27 insertions, 27 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 9167c09b43f..7c3739719bc 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -440,6 +440,33 @@ int X86TTIImpl::getArithmeticInstrCost( ISD = ISD::MUL; } + static const CostTblEntry AVX1CostTable[] = { + // We don't have to scalarize unsupported ops. We can issue two half-sized + // operations and we only need to extract the upper YMM half. + // Two ops + 1 extract + 1 insert = 4. + { ISD::MUL, MVT::v16i16, 4 }, + { ISD::MUL, MVT::v8i32, 4 }, + { ISD::SUB, MVT::v32i8, 4 }, + { ISD::ADD, MVT::v32i8, 4 }, + { ISD::SUB, MVT::v16i16, 4 }, + { ISD::ADD, MVT::v16i16, 4 }, + { ISD::SUB, MVT::v8i32, 4 }, + { ISD::ADD, MVT::v8i32, 4 }, + { ISD::SUB, MVT::v4i64, 4 }, + { ISD::ADD, MVT::v4i64, 4 }, + + // A v4i64 multiply is custom lowered as two split v2i64 vectors that then + // are lowered as a series of long multiplies(3), shifts(3) and adds(2) + // Because we believe v4i64 to be a legal type, we must also include the + // extract+insert in the cost table. Therefore, the cost here is 18 + // instead of 8. + { ISD::MUL, MVT::v4i64, 18 }, + }; + + if (ST->hasAVX() && !ST->hasAVX2()) + if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + static const CostTblEntry SSE42CostTable[] = { { ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/ @@ -529,33 +556,6 @@ int X86TTIImpl::getArithmeticInstrCost( if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry AVX1CostTable[] = { - // We don't have to scalarize unsupported ops. We can issue two half-sized - // operations and we only need to extract the upper YMM half. - // Two ops + 1 extract + 1 insert = 4. - { ISD::MUL, MVT::v16i16, 4 }, - { ISD::MUL, MVT::v8i32, 4 }, - { ISD::SUB, MVT::v32i8, 4 }, - { ISD::ADD, MVT::v32i8, 4 }, - { ISD::SUB, MVT::v16i16, 4 }, - { ISD::ADD, MVT::v16i16, 4 }, - { ISD::SUB, MVT::v8i32, 4 }, - { ISD::ADD, MVT::v8i32, 4 }, - { ISD::SUB, MVT::v4i64, 4 }, - { ISD::ADD, MVT::v4i64, 4 }, - // A v4i64 multiply is custom lowered as two split v2i64 vectors that then - // are lowered as a series of long multiplies(3), shifts(3) and adds(2) - // Because we believe v4i64 to be a legal type, we must also include the - // extract+insert in the cost table. Therefore, the cost here is 18 - // instead of 8. - { ISD::MUL, MVT::v4i64, 18 }, - }; - - // Look for AVX1 lowering tricks. - if (ST->hasAVX() && !ST->hasAVX2()) - if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second)) - return LT.first * Entry->Cost; - static const CostTblEntry SSE1CostTable[] = { { ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/ { ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/ |