diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-07 22:08:09 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-07 22:08:09 +0000 |
commit | a470296367e48596646f0407044c8f58d40b5572 (patch) | |
tree | 6974de2e3a3de41529b9bdd6ac285caf323e7096 /llvm/lib | |
parent | 82e3e05fe29e68a54c9f216eed9aede7bb9d4510 (diff) | |
download | bcm5719-llvm-a470296367e48596646f0407044c8f58d40b5572.tar.gz bcm5719-llvm-a470296367e48596646f0407044c8f58d40b5572.zip |
[CostModel][X86] Fix AVX2 v16i16 shift 'splat' costs.
llvm-svn: 291366
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 829b47b7aa2..30b20555257 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -291,6 +291,20 @@ int X86TTIImpl::getArithmeticInstrCost( return LT.first * Entry->Cost; } + static const CostTblEntry AVX2UniformCostTable[] = { + // Uniform splats are cheaper for the following instructions. + { ISD::SRL, MVT::v16i16, 1 }, // psrlw. + { ISD::SRA, MVT::v16i16, 1 }, // psraw. + }; + + if (ST->hasAVX2() && + ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || + (Op2Info == TargetTransformInfo::OK_UniformValue))) { + if (const auto *Entry = + CostTableLookup(AVX2UniformCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + } + static const CostTblEntry XOPShiftCostTable[] = { // 128bit shifts take 1cy, but right shifts require negation beforehand. { ISD::SHL, MVT::v16i8, 1 }, @@ -325,8 +339,7 @@ int X86TTIImpl::getArithmeticInstrCost( if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry - SSE2UniformCostTable[] = { + static const CostTblEntry SSE2UniformCostTable[] = { // Uniform splats are cheaper for the following instructions. { ISD::SHL, MVT::v16i8, 1 }, // psllw. { ISD::SHL, MVT::v32i8, 2 }, // psllw. |