summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-01-07 22:08:09 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-01-07 22:08:09 +0000
commita470296367e48596646f0407044c8f58d40b5572 (patch)
tree6974de2e3a3de41529b9bdd6ac285caf323e7096
parent82e3e05fe29e68a54c9f216eed9aede7bb9d4510 (diff)
downloadbcm5719-llvm-a470296367e48596646f0407044c8f58d40b5572.tar.gz
bcm5719-llvm-a470296367e48596646f0407044c8f58d40b5572.zip
[CostModel][X86] Fix AVX2 v16i16 shift 'splat' costs.
llvm-svn: 291366
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp17
-rw-r--r--llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll14
-rw-r--r--llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll14
3 files changed, 31 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 829b47b7aa2..30b20555257 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -291,6 +291,20 @@ int X86TTIImpl::getArithmeticInstrCost(
return LT.first * Entry->Cost;
}
+ static const CostTblEntry AVX2UniformCostTable[] = {
+ // Uniform splats are cheaper for the following instructions.
+ { ISD::SRL, MVT::v16i16, 1 }, // psrlw.
+ { ISD::SRA, MVT::v16i16, 1 }, // psraw.
+ };
+
+ if (ST->hasAVX2() &&
+ ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
+ (Op2Info == TargetTransformInfo::OK_UniformValue))) {
+ if (const auto *Entry =
+ CostTableLookup(AVX2UniformCostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
static const CostTblEntry XOPShiftCostTable[] = {
// 128bit shifts take 1cy, but right shifts require negation beforehand.
{ ISD::SHL, MVT::v16i8, 1 },
@@ -325,8 +339,7 @@ int X86TTIImpl::getArithmeticInstrCost(
if (const auto *Entry = CostTableLookup(XOPShiftCostTable, ISD, LT.second))
return LT.first * Entry->Cost;
- static const CostTblEntry
- SSE2UniformCostTable[] = {
+ static const CostTblEntry SSE2UniformCostTable[] = {
// Uniform splats are cheaper for the following instructions.
{ ISD::SHL, MVT::v16i8, 1 }, // psllw.
{ ISD::SHL, MVT::v32i8, 2 }, // psllw.
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index ab1eb730109..e75b5dc3ddc 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -578,9 +578,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
; SSE2: Found an estimated cost of 2 for instruction: %shift
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
-; AVX2: Found an estimated cost of 2 for instruction: %shift
-; AVX512: Found an estimated cost of 2 for instruction: %shift
-; XOP: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <16 x i16> %shift
}
@@ -590,10 +591,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
; SSE2: Found an estimated cost of 4 for instruction: %shift
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
-; AVX2: Found an estimated cost of 4 for instruction: %shift
-; AVX512F: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512F: Found an estimated cost of 2 for instruction: %shift
; AVX512BW: Found an estimated cost of 1 for instruction: %shift
-; XOP: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <32 x i16> %shift
}
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index fea727147ff..c1c674875c9 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -589,9 +589,10 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
; SSE2: Found an estimated cost of 2 for instruction: %shift
; SSE41: Found an estimated cost of 2 for instruction: %shift
; AVX: Found an estimated cost of 2 for instruction: %shift
-; AVX2: Found an estimated cost of 2 for instruction: %shift
-; AVX512: Found an estimated cost of 2 for instruction: %shift
-; XOP: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 1 for instruction: %shift
+; AVX512: Found an estimated cost of 1 for instruction: %shift
+; XOPAVX: Found an estimated cost of 4 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
%shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <16 x i16> %shift
}
@@ -601,10 +602,11 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
; SSE2: Found an estimated cost of 4 for instruction: %shift
; SSE41: Found an estimated cost of 4 for instruction: %shift
; AVX: Found an estimated cost of 4 for instruction: %shift
-; AVX2: Found an estimated cost of 4 for instruction: %shift
-; AVX512F: Found an estimated cost of 4 for instruction: %shift
+; AVX2: Found an estimated cost of 2 for instruction: %shift
+; AVX512F: Found an estimated cost of 2 for instruction: %shift
; AVX512BW: Found an estimated cost of 1 for instruction: %shift
-; XOP: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX: Found an estimated cost of 8 for instruction: %shift
+; XOPAVX2: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <32 x i16> %shift
}
OpenPOWER on IntegriCloud