4 files changed, 70 insertions, 46 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 32e0f986695..c0a12a82a32 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -248,19 +248,31 @@ int X86TTIImpl::getArithmeticInstrCost(
     // custom.
     // Constant splats are cheaper for the following instructions.
     { ISD::SHL,  MVT::v16i8,  1 }, // psllw.
+    { ISD::SHL,  MVT::v32i8,  2 }, // psllw.
     { ISD::SHL,  MVT::v8i16,  1 }, // psllw.
+    { ISD::SHL,  MVT::v16i16, 2 }, // psllw.
     { ISD::SHL,  MVT::v4i32,  1 }, // pslld
+    { ISD::SHL,  MVT::v8i32,  2 }, // pslld
     { ISD::SHL,  MVT::v2i64,  1 }, // psllq.
+    { ISD::SHL,  MVT::v4i64,  2 }, // psllq.
 
     { ISD::SRL,  MVT::v16i8,  1 }, // psrlw.
+    { ISD::SRL,  MVT::v32i8,  2 }, // psrlw.
     { ISD::SRL,  MVT::v8i16,  1 }, // psrlw.
+    { ISD::SRL,  MVT::v16i16, 2 }, // psrlw.
     { ISD::SRL,  MVT::v4i32,  1 }, // psrld.
+    { ISD::SRL,  MVT::v8i32,  2 }, // psrld.
     { ISD::SRL,  MVT::v2i64,  1 }, // psrlq.
+    { ISD::SRL,  MVT::v4i64,  2 }, // psrlq.
 
     { ISD::SRA,  MVT::v16i8,  4 }, // psrlw, pand, pxor, psubb.
+    { ISD::SRA,  MVT::v32i8,  8 }, // psrlw, pand, pxor, psubb.
     { ISD::SRA,  MVT::v8i16,  1 }, // psraw.
+    { ISD::SRA,  MVT::v16i16, 2 }, // psraw.
     { ISD::SRA,  MVT::v4i32,  1 }, // psrad.
+    { ISD::SRA,  MVT::v8i32,  2 }, // psrad.
     { ISD::SRA,  MVT::v2i64,  4 }, // 2 x psrad + shuffle.
+    { ISD::SRA,  MVT::v4i64,  8 }, // 2 x psrad + shuffle.
 
     { ISD::SDIV, MVT::v8i16,  6 }, // pmulhw sequence
     { ISD::UDIV, MVT::v8i16,  6 }, // pmulhuw sequence
@@ -282,15 +294,22 @@ int X86TTIImpl::getArithmeticInstrCost(
   if (ISD == ISD::SHL &&
       Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
     EVT VT = LT.second;
+    // Vector shift left by non uniform constant can be lowered
+    // into vector multiply (pmullw/pmulld).
     if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
         (VT == MVT::v4i32 && ST->hasSSE41()))
-      // Vector shift left by non uniform constant can be lowered
-      // into vector multiply (pmullw/pmulld).
       return LT.first;
+
+    // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
+    // sequence of extract + two vector multiply + insert.
+    if ((VT == MVT::v8i32 || VT == MVT::v16i16) &&
+       (ST->hasAVX() && !ST->hasAVX2()))
+      ISD = ISD::MUL;
+
+    // A vector shift left by non uniform constant is converted
+    // into a vector multiply; the new multiply is eventually
+    // lowered into a sequence of shuffles and 2 x pmuludq.
     if (VT == MVT::v4i32 && ST->hasSSE2())
-      // A vector shift left by non uniform constant is converted
-      // into a vector multiply; the new multiply is eventually
-      // lowered into a sequence of shuffles and 2 x pmuludq.
       ISD = ISD::MUL;
   }
 
@@ -304,20 +323,31 @@ int X86TTIImpl::getArithmeticInstrCost(
     // used for vectorization and we don't want to make vectorized code worse
     // than scalar code.
     { ISD::SHL,  MVT::v16i8,    26 }, // cmpgtb sequence.
+    { ISD::SHL,  MVT::v32i8,  2*26 }, // cmpgtb sequence.
     { ISD::SHL,  MVT::v8i16,    32 }, // cmpgtb sequence.
+    { ISD::SHL,  MVT::v16i16, 2*32 }, // cmpgtb sequence.
     { ISD::SHL,  MVT::v4i32,   2*5 }, // We optimized this using mul.
+    { ISD::SHL,  MVT::v8i32, 2*2*5 }, // We optimized this using mul.
     { ISD::SHL,  MVT::v2i64,     4 }, // splat+shuffle sequence.
-    { ISD::SHL,  MVT::v4i64,     8 }, // splat+shuffle sequence.
+    { ISD::SHL,  MVT::v4i64,   2*4 }, // splat+shuffle sequence.
 
     { ISD::SRL,  MVT::v16i8,    26 }, // cmpgtb sequence.
+    { ISD::SRL,  MVT::v32i8,  2*26 }, // cmpgtb sequence.
     { ISD::SRL,  MVT::v8i16,    32 }, // cmpgtb sequence.
+    { ISD::SRL,  MVT::v16i16, 2*32 }, // cmpgtb sequence.
     { ISD::SRL,  MVT::v4i32,    16 }, // Shift each lane + blend.
+    { ISD::SRL,  MVT::v8i32,  2*16 }, // Shift each lane + blend.
     { ISD::SRL,  MVT::v2i64,     4 }, // splat+shuffle sequence.
+    { ISD::SRL,  MVT::v4i64,   2*4 }, // splat+shuffle sequence.
 
     { ISD::SRA,  MVT::v16i8,    54 }, // unpacked cmpgtb sequence.
+    { ISD::SRA,  MVT::v32i8,  2*54 }, // unpacked cmpgtb sequence.
     { ISD::SRA,  MVT::v8i16,    32 }, // cmpgtb sequence.
+    { ISD::SRA,  MVT::v16i16, 2*32 }, // cmpgtb sequence.
     { ISD::SRA,  MVT::v4i32,    16 }, // Shift each lane + blend.
+    { ISD::SRA,  MVT::v8i32,  2*16 }, // Shift each lane + blend.
     { ISD::SRA,  MVT::v2i64,    12 }, // srl/xor/sub sequence.
+    { ISD::SRA,  MVT::v4i64,  2*12 }, // srl/xor/sub sequence.
 
     // It is not a good idea to vectorize division. We have to scalarize it and
     // in the process we will often end up having to spilling regular
@@ -363,12 +393,6 @@ int X86TTIImpl::getArithmeticInstrCost(
   if (ST->hasAVX() && !ST->hasAVX2()) {
     EVT VT = LT.second;
 
-    // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
-    // sequence of extract + two vector multiply + insert.
-    if (ISD == ISD::SHL && (VT == MVT::v8i32 || VT == MVT::v16i16) &&
-        Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)
-      ISD = ISD::MUL;
-
     int Idx = CostTableLookup(AVX1CostTable, ISD, VT);
     if (Idx != -1)
       return LT.first * AVX1CostTable[Idx].Cost;
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index cb1aa60f034..a0d07d7b6ec 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -26,7 +26,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
 ; SSE2: Found an estimated cost of 24 for instruction:   %shift
 ; SSE41: Found an estimated cost of 24 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <4 x i64> %a, %b
@@ -49,7 +49,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -72,7 +72,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <16 x i16> %a, %b
@@ -94,7 +94,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
 ; SSE2: Found an estimated cost of 108 for instruction:   %shift
 ; SSE41: Found an estimated cost of 108 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 108 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <32 x i8> %a, %b
@@ -121,7 +121,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
 ; SSE2: Found an estimated cost of 24 for instruction:   %shift
 ; SSE41: Found an estimated cost of 24 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
@@ -146,7 +146,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -171,7 +171,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -195,7 +195,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
 ; SSE2: Found an estimated cost of 108 for instruction:   %shift
 ; SSE41: Found an estimated cost of 108 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 108 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -222,7 +222,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
 ; SSE2: Found an estimated cost of 24 for instruction:   %shift
 ; SSE41: Found an estimated cost of 24 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
@@ -245,7 +245,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -268,7 +268,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
@@ -290,7 +290,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
 ; SSE2: Found an estimated cost of 108 for instruction:   %shift
 ; SSE41: Found an estimated cost of 108 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 108 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
@@ -316,7 +316,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 4 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -384,7 +384,7 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 24 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index 08b752b778e..a686b4368f2 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -27,7 +27,7 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -51,7 +51,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -74,7 +74,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = lshr <16 x i16> %a, %b
@@ -96,7 +96,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = lshr <32 x i8> %a, %b
@@ -124,7 +124,7 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -150,7 +150,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -175,7 +175,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -199,7 +199,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -227,7 +227,7 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
 ; SSE2: Found an estimated cost of 8 for instruction:   %shift
 ; SSE41: Found an estimated cost of 8 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -251,7 +251,7 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
 ; SSE2: Found an estimated cost of 32 for instruction:   %shift
 ; SSE41: Found an estimated cost of 32 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -274,7 +274,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
@@ -296,7 +296,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 4 for instruction:   %shift
   %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index 116828c9964..85ca5a5a7f3 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -52,7 +52,7 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
 ; SSE2: Found an estimated cost of 20 for instruction:   %shift
 ; SSE41: Found an estimated cost of 20 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 20 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -75,7 +75,7 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = shl <16 x i16> %a, %b
@@ -97,7 +97,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = shl <32 x i8> %a, %b
@@ -151,7 +151,7 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
 ; SSE2: Found an estimated cost of 20 for instruction:   %shift
 ; SSE41: Found an estimated cost of 20 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 20 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
@@ -176,7 +176,7 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
 ; SSE2: Found an estimated cost of 64 for instruction:   %shift
 ; SSE41: Found an estimated cost of 64 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
 ; AVX2: Found an estimated cost of 10 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -200,7 +200,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -298,7 +298,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
 ; SSE2: Found an estimated cost of 52 for instruction:   %shift
 ; SSE41: Found an estimated cost of 52 for instruction:   %shift
-; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
 ; AVX2: Found an estimated cost of 11 for instruction:   %shift
 ; XOP: Found an estimated cost of 2 for instruction:   %shift
   %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
@@ -325,7 +325,7 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
 ; SSE2: Found an estimated cost of 2 for instruction:   %shift
 ; SSE41: Found an estimated cost of 2 for instruction:   %shift
-; AVX: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
 ; AVX2: Found an estimated cost of 1 for instruction:   %shift
 ; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
 ; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift