[CostModel][X86] Updated vXi8 and vXi16 Reverse/Alternate shuffle costs

Actual codegen is much better than the extract+insert patterns that was assumed. llvm-svn: 290962
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2017-01-04 14:01:33 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2017-01-04 14:01:33 +0000
commit: bb895f3e9c49d14d637bdf14358e062160803655 (patch)
tree: 792b7f9c380c46ad195dff5b4cb46a0f0b56c2c0
parent: c08b90d08fcf4ca5df8093815863044463de9257 (diff)
download: bcm5719-llvm-bb895f3e9c49d14d637bdf14358e062160803655.tar.gz
bcm5719-llvm-bb895f3e9c49d14d637bdf14358e062160803655.zip
2 files changed, 31 insertions, 33 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 4711baba603..d7792e296a5 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -645,7 +645,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       { TTI::SK_Reverse,   MVT::v16i16, 2 }, // vperm2i128 + pshufb
       { TTI::SK_Reverse,   MVT::v32i8,  2 }, // vperm2i128 + pshufb
 
-      { TTI::SK_Alternate, MVT::v16i16, 1 }  // vpblendw
+      { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
+      { TTI::SK_Alternate, MVT::v32i8,  1 }  // vpblendvb
     };
 
     if (ST->hasAVX2())
@@ -666,11 +667,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       { TTI::SK_Alternate, MVT::v4f64,  1 }, // vblendpd
       { TTI::SK_Alternate, MVT::v8i32,  1 }, // vblendps
       { TTI::SK_Alternate, MVT::v8f32,  1 }, // vblendps
-
-      { TTI::SK_Alternate, MVT::v16i16, 5 }, // 2*vextractf128 + 2*vpblendw
-                                             // + vinsertf128
-      { TTI::SK_Alternate, MVT::v32i8,  9 }  // 2*vextractf128 + 4*vpshufb
-                                             // + 2*vpor + vinsertf128
+      { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
+      { TTI::SK_Alternate, MVT::v32i8,  3 }  // vpand + vpandn + vpor
     };
 
     if (ST->hasAVX())
@@ -683,7 +681,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       { TTI::SK_Alternate, MVT::v4i32,  1 }, // pblendw
       { TTI::SK_Alternate, MVT::v4f32,  1 }, // blendps
       { TTI::SK_Alternate, MVT::v8i16,  1 }, // pblendw
-      { TTI::SK_Alternate, MVT::v16i8,  3 }  // 2*pshufb + por
+      { TTI::SK_Alternate, MVT::v16i8,  1 }  // pblendvb
     };
 
     if (ST->hasSSE41())
@@ -713,8 +711,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       { TTI::SK_Alternate, MVT::v2i64,  1 }, // movsd
       { TTI::SK_Alternate, MVT::v2f64,  1 }, // movsd
       { TTI::SK_Alternate, MVT::v4i32,  2 }, // 2*shufps
-      { TTI::SK_Alternate, MVT::v8i16,  8 }, // 4*pextrw + 4*pinsrw.
-      { TTI::SK_Alternate, MVT::v16i8, 48 }, // 8*(pinsrw + pextrw + and +movb + movzb + or)
+      { TTI::SK_Alternate, MVT::v8i16,  3 }, // pand + pandn + por
+      { TTI::SK_Alternate, MVT::v16i8,  3 }  // pand + pandn + por
     };
 
     if (ST->hasSSE2())
@@ -722,8 +720,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
         return LT.first * Entry->Cost;
 
     static const CostTblEntry SSE1ShuffleTbl[] = {
-        { TTI::SK_Reverse,   MVT::v4f32,  1 }, // shufps
-        { TTI::SK_Alternate, MVT::v4f32,  2 }  // 2*shufps
+      { TTI::SK_Reverse,   MVT::v4f32,  1 }, // shufps
+      { TTI::SK_Alternate, MVT::v4f32,  2 }  // 2*shufps
     };
 
     if (ST->hasSSE1())
diff --git a/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll b/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll
index 2e162f0f000..9e706d62f8f 100644
--- a/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll
@@ -207,7 +207,7 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) {
   ret <8 x i16> %1
 }
 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i16':
-; SSE2: Cost Model: {{.*}} 8 for instruction:   %1 = shufflevector
+; SSE2: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
 ; SSSE3: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
 ; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
 ; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
@@ -219,7 +219,7 @@ define <8 x i16> @test_v8i16_2(<8 x i16> %a, <8 x i16> %b) {
   ret <8 x i16> %1
 }
 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i16_2':
-; SSE2: Cost Model: {{.*}} 8 for instruction:   %1 = shufflevector
+; SSE2: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
 ; SSSE3: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
 ; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
 ; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
@@ -280,11 +280,11 @@ define <16 x i8> @test_v16i8(<16 x i8> %a, <16 x i8> %b) {
   ret <16 x i8> %1
 }
 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i8':
-; SSE2: Cost Model: {{.*}} 48 for instruction:   %1 = shufflevector
+; SSE2: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
 ; SSSE3: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
-; SSE41: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
-; AVX: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
-; AVX2: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
 
 
 define <16 x i8> @test_v16i8_2(<16 x i8> %a, <16 x i8> %b) {
@@ -292,11 +292,11 @@ define <16 x i8> @test_v16i8_2(<16 x i8> %a, <16 x i8> %b) {
   ret <16 x i8> %1
 }
 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i8_2':
-; SSE2: Cost Model: {{.*}} 48 for instruction:   %1 = shufflevector
+; SSE2: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
 ; SSSE3: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
-; SSE41: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
-; AVX: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
-; AVX2: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
 
 
 define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
@@ -304,10 +304,10 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
   ret <16 x i16> %1
 }
 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i16':
-; SSE2: Cost Model: {{.*}} 16 for instruction:   %1 = shufflevector
+; SSE2: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
 ; SSSE3: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
 ; SSE41: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
-; AVX: Cost Model: {{.*}} 5 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
 ; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
 
 
@@ -316,10 +316,10 @@ define <16 x i16> @test_v16i16_2(<16 x i16> %a, <16 x i16> %b) {
   ret <16 x i16> %1
 }
 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i16_2':
-; SSE2: Cost Model: {{.*}} 16 for instruction:   %1 = shufflevector
+; SSE2: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
 ; SSSE3: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
 ; SSE41: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
-; AVX: Cost Model: {{.*}} 5 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
 ; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
 
 define <32 x i8> @test_v32i8(<32 x i8> %a, <32 x i8> %b) {
@@ -327,11 +327,11 @@ define <32 x i8> @test_v32i8(<32 x i8> %a, <32 x i8> %b) {
   ret <32 x i8> %1
 }
 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v32i8':
-; SSE2: Cost Model: {{.*}} 96 for instruction:   %1 = shufflevector
+; SSE2: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
 ; SSSE3: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
-; SSE41: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
-; AVX: Cost Model: {{.*}} 9 for instruction:   %1 = shufflevector
-; AVX2: Cost Model: {{.*}} 9 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
 
 
 define <32 x i8> @test_v32i8_2(<32 x i8> %a, <32 x i8> %b) {
@@ -339,9 +339,9 @@ define <32 x i8> @test_v32i8_2(<32 x i8> %a, <32 x i8> %b) {
   ret <32 x i8> %1
 }
 ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v32i8_2':
-; SSE2: Cost Model: {{.*}} 96 for instruction:   %1 = shufflevector
+; SSE2: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
 ; SSSE3: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
-; SSE41: Cost Model: {{.*}} 6 for instruction:   %1 = shufflevector
-; AVX: Cost Model: {{.*}} 9 for instruction:   %1 = shufflevector
-; AVX2: Cost Model: {{.*}} 9 for instruction:   %1 = shufflevector
+; SSE41: Cost Model: {{.*}} 2 for instruction:   %1 = shufflevector
+; AVX: Cost Model: {{.*}} 3 for instruction:   %1 = shufflevector
+; AVX2: Cost Model: {{.*}} 1 for instruction:   %1 = shufflevector
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2017-01-04 14:01:33 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2017-01-04 14:01:33 +0000
commit	bb895f3e9c49d14d637bdf14358e062160803655 (patch)
tree	792b7f9c380c46ad195dff5b4cb46a0f0b56c2c0
parent	c08b90d08fcf4ca5df8093815863044463de9257 (diff)
download	bcm5719-llvm-bb895f3e9c49d14d637bdf14358e062160803655.tar.gz bcm5719-llvm-bb895f3e9c49d14d637bdf14358e062160803655.zip