diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-04 14:01:33 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-01-04 14:01:33 +0000 |
commit | bb895f3e9c49d14d637bdf14358e062160803655 (patch) | |
tree | 792b7f9c380c46ad195dff5b4cb46a0f0b56c2c0 | |
parent | c08b90d08fcf4ca5df8093815863044463de9257 (diff) | |
download | bcm5719-llvm-bb895f3e9c49d14d637bdf14358e062160803655.tar.gz bcm5719-llvm-bb895f3e9c49d14d637bdf14358e062160803655.zip |
[CostModel][X86] Updated vXi8 and vXi16 Reverse/Alternate shuffle costs
Actual codegen is much better than the extract+insert patterns that was assumed.
llvm-svn: 290962
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 20 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll | 44 |
2 files changed, 31 insertions, 33 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 4711baba603..d7792e296a5 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -645,7 +645,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb - { TTI::SK_Alternate, MVT::v16i16, 1 } // vpblendw + { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw + { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb }; if (ST->hasAVX2()) @@ -666,11 +667,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps - - { TTI::SK_Alternate, MVT::v16i16, 5 }, // 2*vextractf128 + 2*vpblendw - // + vinsertf128 - { TTI::SK_Alternate, MVT::v32i8, 9 } // 2*vextractf128 + 4*vpshufb - // + 2*vpor + vinsertf128 + { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor + { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor }; if (ST->hasAVX()) @@ -683,7 +681,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw - { TTI::SK_Alternate, MVT::v16i8, 3 } // 2*pshufb + por + { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb }; if (ST->hasSSE41()) @@ -713,8 +711,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps - { TTI::SK_Alternate, MVT::v8i16, 8 }, // 4*pextrw + 4*pinsrw. - { TTI::SK_Alternate, MVT::v16i8, 48 }, // 8*(pinsrw + pextrw + and +movb + movzb + or) + { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por + { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por }; if (ST->hasSSE2()) @@ -722,8 +720,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, return LT.first * Entry->Cost; static const CostTblEntry SSE1ShuffleTbl[] = { - { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps - { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps + { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps + { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps }; if (ST->hasSSE1()) diff --git a/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll b/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll index 2e162f0f000..9e706d62f8f 100644 --- a/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll @@ -207,7 +207,7 @@ define <8 x i16> @test_v8i16(<8 x i16> %a, <8 x i16> %b) { ret <8 x i16> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i16': -; SSE2: Cost Model: {{.*}} 8 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector ; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector @@ -219,7 +219,7 @@ define <8 x i16> @test_v8i16_2(<8 x i16> %a, <8 x i16> %b) { ret <8 x i16> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v8i16_2': -; SSE2: Cost Model: {{.*}} 8 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector ; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector @@ -280,11 +280,11 @@ define <16 x i8> @test_v16i8(<16 x i8> %a, <16 x i8> %b) { ret <16 x i8> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i8': -; SSE2: Cost Model: {{.*}} 48 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector -; SSE41: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector -; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector -; AVX2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector define <16 x i8> @test_v16i8_2(<16 x i8> %a, <16 x i8> %b) { @@ -292,11 +292,11 @@ define <16 x i8> @test_v16i8_2(<16 x i8> %a, <16 x i8> %b) { ret <16 x i8> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i8_2': -; SSE2: Cost Model: {{.*}} 48 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector -; SSE41: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector -; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector -; AVX2: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { @@ -304,10 +304,10 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) { ret <16 x i16> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i16': -; SSE2: Cost Model: {{.*}} 16 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector ; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector -; AVX: Cost Model: {{.*}} 5 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector @@ -316,10 +316,10 @@ define <16 x i16> @test_v16i16_2(<16 x i16> %a, <16 x i16> %b) { ret <16 x i16> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v16i16_2': -; SSE2: Cost Model: {{.*}} 16 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector ; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector -; AVX: Cost Model: {{.*}} 5 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector ; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector define <32 x i8> @test_v32i8(<32 x i8> %a, <32 x i8> %b) { @@ -327,11 +327,11 @@ define <32 x i8> @test_v32i8(<32 x i8> %a, <32 x i8> %b) { ret <32 x i8> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v32i8': -; SSE2: Cost Model: {{.*}} 96 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector -; SSE41: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector -; AVX: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector -; AVX2: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector define <32 x i8> @test_v32i8_2(<32 x i8> %a, <32 x i8> %b) { @@ -339,9 +339,9 @@ define <32 x i8> @test_v32i8_2(<32 x i8> %a, <32 x i8> %b) { ret <32 x i8> %1 } ; CHECK: Printing analysis 'Cost Model Analysis' for function 'test_v32i8_2': -; SSE2: Cost Model: {{.*}} 96 for instruction: %1 = shufflevector +; SSE2: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector ; SSSE3: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector -; SSE41: Cost Model: {{.*}} 6 for instruction: %1 = shufflevector -; AVX: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector -; AVX2: Cost Model: {{.*}} 9 for instruction: %1 = shufflevector +; SSE41: Cost Model: {{.*}} 2 for instruction: %1 = shufflevector +; AVX: Cost Model: {{.*}} 3 for instruction: %1 = shufflevector +; AVX2: Cost Model: {{.*}} 1 for instruction: %1 = shufflevector |