diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-07 22:08:09 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-07 22:08:09 +0000 |
commit | a470296367e48596646f0407044c8f58d40b5572 (patch) | |
tree | 6974de2e3a3de41529b9bdd6ac285caf323e7096 | |
parent | 82e3e05fe29e68a54c9f216eed9aede7bb9d4510 (diff) | |
download | bcm5719-llvm-a470296367e48596646f0407044c8f58d40b5572.tar.gz bcm5719-llvm-a470296367e48596646f0407044c8f58d40b5572.zip |
[CostModel][X86] Fix AVX2 v16i16 shift 'splat' costs.
llvm-svn: 291366
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 17 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll | 14 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll | 14 |
3 files changed, 31 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 829b47b7aa2..30b20555257 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -291,6 +291,20 @@ int X86TTIImpl::getArithmeticInstrCost( return LT.first * Entry->Cost; } + static const CostTblEntry AVX2UniformCostTable[] = { + // Uniform splats are cheaper for the following instructions. + { ISD::SRL, MVT::v16i16, 1 }, // psrlw. + { ISD::SRA, MVT::v16i16, 1 }, // psraw. + }; + + if (ST->hasAVX2() && + ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || + (Op2Info == TargetTransformInfo::OK_UniformValue))) { + if (const auto *Entry = + CostTableLookup(AVX2UniformCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; + } + static const CostTblEntry XOPShiftCostTable[] = { // 128bit shifts take 1cy, but right shifts require negation beforehand. { ISD::SHL, MVT::v16i8, 1 }, @@ -325,8 +339,7 @@ int X86TTIImpl::getArithmeticInstrCost( if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry - SSE2UniformCostTable[] = { + static const CostTblEntry SSE2UniformCostTable[] = { // Uniform splats are cheaper for the following instructions. { ISD::SHL, MVT::v16i8, 1 }, // psllw. { ISD::SHL, MVT::v32i8, 2 }, // psllw. diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index ab1eb730109..e75b5dc3ddc 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -578,9 +578,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift -; AVX2: Found an estimated cost of 2 for instruction: %shift -; AVX512: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> ret <16 x i16> %shift } @@ -590,10 +591,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift -; AVX2: Found an estimated cost of 4 for instruction: %shift -; AVX512F: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512F: Found an estimated cost of 2 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> ret <32 x i16> %shift } diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll index fea727147ff..c1c674875c9 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -589,9 +589,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) { ; SSE2: Found an estimated cost of 2 for instruction: %shift ; SSE41: Found an estimated cost of 2 for instruction: %shift ; AVX: Found an estimated cost of 2 for instruction: %shift -; AVX2: Found an estimated cost of 2 for instruction: %shift -; AVX512: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 1 for instruction: %shift +; AVX512: Found an estimated cost of 1 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift +; XOPAVX2: Found an estimated cost of 1 for instruction: %shift %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> ret <16 x i16> %shift } @@ -601,10 +602,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) { ; SSE2: Found an estimated cost of 4 for instruction: %shift ; SSE41: Found an estimated cost of 4 for instruction: %shift ; AVX: Found an estimated cost of 4 for instruction: %shift -; AVX2: Found an estimated cost of 4 for instruction: %shift -; AVX512F: Found an estimated cost of 4 for instruction: %shift +; AVX2: Found an estimated cost of 2 for instruction: %shift +; AVX512F: Found an estimated cost of 2 for instruction: %shift ; AVX512BW: Found an estimated cost of 1 for instruction: %shift -; XOP: Found an estimated cost of 8 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift +; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> ret <32 x i16> %shift } |