diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-05-14 17:59:46 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-05-14 17:59:46 +0000 |
commit | 5bef9c627e44077e4ec5024be062faecc4eab3e5 (patch) | |
tree | 68afdca220cfdad89eaf113ceda81b5fc34bf516 | |
parent | aa8dffb69bccbe4e41469e0023b7508dd44945f8 (diff) | |
download | bcm5719-llvm-5bef9c627e44077e4ec5024be062faecc4eab3e5.tar.gz bcm5719-llvm-5bef9c627e44077e4ec5024be062faecc4eab3e5.zip |
[X86][XOP] XOP's general v16i8 shifts will be used instead of v8i16 shift + mask.
Tweak cost model to match what lowering actually does.
llvm-svn: 303013
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 9 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll | 6 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll | 6 |
3 files changed, 12 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index ba6462a0768..2952925ee41 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -273,9 +273,12 @@ int X86TTIImpl::getArithmeticInstrCost( if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41()) return LT.first * 15; - if (const auto *Entry = CostTableLookup(SSE2UniformConstCostTable, ISD, - LT.second)) - return LT.first * Entry->Cost; + // XOP has faster vXi8 shifts. + if ((ISD != ISD::SHL && ISD != ISD::SRL && ISD != ISD::SRA) || + !ST->hasXOP()) + if (const auto *Entry = + CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second)) + return LT.first * Entry->Cost; } static const CostTblEntry AVX2UniformCostTable[] = { diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll index d57e0a2824a..dd1003fe574 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -628,7 +628,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; AVX: Found an estimated cost of 4 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512: Found an estimated cost of 4 for instruction: %shift -; XOP: Found an estimated cost of 4 for instruction: %shift +; XOP: Found an estimated cost of 2 for instruction: %shift %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <16 x i8> %shift } @@ -640,7 +640,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; AVX: Found an estimated cost of 10 for instruction: %shift ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512: Found an estimated cost of 4 for instruction: %shift -; XOPAVX: Found an estimated cost of 10 for instruction: %shift +; XOPAVX: Found an estimated cost of 6 for instruction: %shift ; XOPAVX2: Found an estimated cost of 4 for instruction: %shift %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <32 x i8> %shift @@ -654,7 +654,7 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; AVX2: Found an estimated cost of 8 for instruction: %shift ; AVX512F: Found an estimated cost of 8 for instruction: %shift ; AVX512BW: Found an estimated cost of 4 for instruction: %shift -; XOPAVX: Found an estimated cost of 20 for instruction: %shift +; XOPAVX: Found an estimated cost of 12 for instruction: %shift ; XOPAVX2: Found an estimated cost of 8 for instruction: %shift %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <64 x i8> %shift diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll index e2f1a359b3e..99ffced8d7b 100644 --- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -638,7 +638,7 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) { ; AVX: Found an estimated cost of 2 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 2 for instruction: %shift -; XOP: Found an estimated cost of 2 for instruction: %shift +; XOP: Found an estimated cost of 1 for instruction: %shift %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <16 x i8> %shift } @@ -650,7 +650,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) { ; AVX: Found an estimated cost of 6 for instruction: %shift ; AVX2: Found an estimated cost of 2 for instruction: %shift ; AVX512: Found an estimated cost of 2 for instruction: %shift -; XOPAVX: Found an estimated cost of 6 for instruction: %shift +; XOPAVX: Found an estimated cost of 4 for instruction: %shift ; XOPAVX2: Found an estimated cost of 2 for instruction: %shift %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <32 x i8> %shift @@ -664,7 +664,7 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) { ; AVX2: Found an estimated cost of 4 for instruction: %shift ; AVX512F: Found an estimated cost of 4 for instruction: %shift ; AVX512BW: Found an estimated cost of 2 for instruction: %shift -; XOPAVX: Found an estimated cost of 12 for instruction: %shift +; XOPAVX: Found an estimated cost of 8 for instruction: %shift ; XOPAVX2: Found an estimated cost of 4 for instruction: %shift %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <64 x i8> %shift |