summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp26
-rw-r--r--llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll84
-rw-r--r--llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll84
-rw-r--r--llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll50
4 files changed, 135 insertions, 109 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 7f6dc2b2164..a2cc73addf4 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -400,6 +400,32 @@ int X86TTIImpl::getArithmeticInstrCost(
ISD = ISD::MUL;
}
+ static const CostTblEntry SSE41CostTable[] = {
+ { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence.
+ { ISD::SHL, MVT::v32i8, 2*11 }, // pblendvb sequence.
+ { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence.
+ { ISD::SHL, MVT::v16i16, 2*14 }, // pblendvb sequence.
+
+ { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence.
+ { ISD::SRL, MVT::v32i8, 2*12 }, // pblendvb sequence.
+ { ISD::SRL, MVT::v8i16, 14 }, // pblendvb sequence.
+ { ISD::SRL, MVT::v16i16, 2*14 }, // pblendvb sequence.
+ { ISD::SRL, MVT::v4i32, 11 }, // Shift each lane + blend.
+ { ISD::SRL, MVT::v8i32, 2*11 }, // Shift each lane + blend.
+
+ { ISD::SRA, MVT::v16i8, 24 }, // pblendvb sequence.
+ { ISD::SRA, MVT::v32i8, 2*24 }, // pblendvb sequence.
+ { ISD::SRA, MVT::v8i16, 14 }, // pblendvb sequence.
+ { ISD::SRA, MVT::v16i16, 2*14 }, // pblendvb sequence.
+ { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend.
+ { ISD::SRA, MVT::v8i32, 2*12 }, // Shift each lane + blend.
+ };
+
+ if (ST->hasSSE41()) {
+ if (const auto *Entry = CostTableLookup(SSE41CostTable, ISD, LT.second))
+ return LT.first * Entry->Cost;
+ }
+
static const CostTblEntry SSE2CostTable[] = {
// We don't correctly identify costs of casts because they are marked as
// custom.
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index a0d07d7b6ec..e53e40b57e1 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -36,8 +36,8 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -48,8 +48,8 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -60,9 +60,9 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <8 x i16> %a, %b
ret <8 x i16> %shift
@@ -71,8 +71,8 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <16 x i16> %a, %b
@@ -82,9 +82,9 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
-; SSE41: Found an estimated cost of 54 for instruction: %shift
-; AVX: Found an estimated cost of 54 for instruction: %shift
-; AVX2: Found an estimated cost of 54 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
+; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <16 x i8> %a, %b
ret <16 x i8> %shift
@@ -93,8 +93,8 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
; SSE2: Found an estimated cost of 108 for instruction: %shift
-; SSE41: Found an estimated cost of 108 for instruction: %shift
-; AVX: Found an estimated cost of 108 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <32 x i8> %a, %b
@@ -132,8 +132,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -145,8 +145,8 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -158,9 +158,9 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = ashr <8 x i16> %a, %splat
@@ -170,8 +170,8 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -182,9 +182,9 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
-; SSE41: Found an estimated cost of 54 for instruction: %shift
-; AVX: Found an estimated cost of 54 for instruction: %shift
-; AVX2: Found an estimated cost of 54 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
+; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = ashr <16 x i8> %a, %splat
@@ -194,8 +194,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
; SSE2: Found an estimated cost of 108 for instruction: %shift
-; SSE41: Found an estimated cost of 108 for instruction: %shift
-; AVX: Found an estimated cost of 108 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -232,8 +232,8 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -244,8 +244,8 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -256,9 +256,9 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
@@ -267,8 +267,8 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
@@ -278,9 +278,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 54 for instruction: %shift
-; SSE41: Found an estimated cost of 54 for instruction: %shift
-; AVX: Found an estimated cost of 54 for instruction: %shift
-; AVX2: Found an estimated cost of 54 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
+; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
@@ -289,8 +289,8 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
; SSE2: Found an estimated cost of 108 for instruction: %shift
-; SSE41: Found an estimated cost of 108 for instruction: %shift
-; AVX: Found an estimated cost of 108 for instruction: %shift
+; SSE41: Found an estimated cost of 48 for instruction: %shift
+; AVX: Found an estimated cost of 48 for instruction: %shift
; AVX2: Found an estimated cost of 24 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index a686b4368f2..6d028268ea5 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -38,8 +38,8 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -50,8 +50,8 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -62,9 +62,9 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <8 x i16> %a, %b
ret <8 x i16> %shift
@@ -73,8 +73,8 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <16 x i16> %a, %b
@@ -84,9 +84,9 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
+; AVX2: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <16 x i8> %a, %b
ret <16 x i8> %shift
@@ -95,8 +95,8 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <32 x i8> %a, %b
@@ -136,8 +136,8 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -149,8 +149,8 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -162,9 +162,9 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = lshr <8 x i16> %a, %splat
@@ -174,8 +174,8 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -186,9 +186,9 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
+; AVX2: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = lshr <16 x i8> %a, %splat
@@ -198,8 +198,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -238,8 +238,8 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
; SSE2: Found an estimated cost of 16 for instruction: %shift
-; SSE41: Found an estimated cost of 16 for instruction: %shift
-; AVX: Found an estimated cost of 16 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 2 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -250,8 +250,8 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 1 for instruction: %shift
; XOPAVX: Found an estimated cost of 4 for instruction: %shift
; XOPAVX2: Found an estimated cost of 1 for instruction: %shift
@@ -262,9 +262,9 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
ret <8 x i16> %shift
@@ -273,8 +273,8 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
@@ -284,9 +284,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 12 for instruction: %shift
+; AVX: Found an estimated cost of 12 for instruction: %shift
+; AVX2: Found an estimated cost of 12 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
@@ -295,8 +295,8 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 24 for instruction: %shift
+; AVX: Found an estimated cost of 24 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 4 for instruction: %shift
%shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index 85ca5a5a7f3..60ba3adea42 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -63,9 +63,9 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <8 x i16> %a, %b
ret <8 x i16> %shift
@@ -74,8 +74,8 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <16 x i16> %a, %b
@@ -85,9 +85,9 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i8> %a, %b
ret <16 x i8> %shift
@@ -96,8 +96,8 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <32 x i8> %a, %b
@@ -163,9 +163,9 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
; SSE2: Found an estimated cost of 32 for instruction: %shift
-; SSE41: Found an estimated cost of 32 for instruction: %shift
-; AVX: Found an estimated cost of 32 for instruction: %shift
-; AVX2: Found an estimated cost of 32 for instruction: %shift
+; SSE41: Found an estimated cost of 14 for instruction: %shift
+; AVX: Found an estimated cost of 14 for instruction: %shift
+; AVX2: Found an estimated cost of 14 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
%shift = shl <8 x i16> %a, %splat
@@ -175,8 +175,8 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
; SSE2: Found an estimated cost of 64 for instruction: %shift
-; SSE41: Found an estimated cost of 64 for instruction: %shift
-; AVX: Found an estimated cost of 64 for instruction: %shift
+; SSE41: Found an estimated cost of 28 for instruction: %shift
+; AVX: Found an estimated cost of 28 for instruction: %shift
; AVX2: Found an estimated cost of 10 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -187,9 +187,9 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
%shift = shl <16 x i8> %a, %splat
@@ -199,8 +199,8 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -286,9 +286,9 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
; SSE2: Found an estimated cost of 26 for instruction: %shift
-; SSE41: Found an estimated cost of 26 for instruction: %shift
-; AVX: Found an estimated cost of 26 for instruction: %shift
-; AVX2: Found an estimated cost of 26 for instruction: %shift
+; SSE41: Found an estimated cost of 11 for instruction: %shift
+; AVX: Found an estimated cost of 11 for instruction: %shift
+; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 1 for instruction: %shift
%shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
ret <16 x i8> %shift
@@ -297,8 +297,8 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
; SSE2: Found an estimated cost of 52 for instruction: %shift
-; SSE41: Found an estimated cost of 52 for instruction: %shift
-; AVX: Found an estimated cost of 52 for instruction: %shift
+; SSE41: Found an estimated cost of 22 for instruction: %shift
+; AVX: Found an estimated cost of 22 for instruction: %shift
; AVX2: Found an estimated cost of 11 for instruction: %shift
; XOP: Found an estimated cost of 2 for instruction: %shift
%shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
OpenPOWER on IntegriCloud