diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-08-10 17:27:20 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-08-10 17:27:20 +0000 |
| commit | 702e5fa391545898facfe1fa10c9bfd530921ce6 (patch) | |
| tree | f4faa85608f1bb02de1fef714bcf73ad6ee12b92 /llvm/lib | |
| parent | c3e546fe42f64e7a19379a87746d05f339bfd801 (diff) | |
| download | bcm5719-llvm-702e5fa391545898facfe1fa10c9bfd530921ce6.tar.gz bcm5719-llvm-702e5fa391545898facfe1fa10c9bfd530921ce6.zip | |
[CostModel][X86] Improve single src shuffle costs
Add missing SK_PermuteSingleSrc costs for AVX2 targets and earlier, also added some of the simpler SK_PermuteTwoSrc costs to support splitting of SK_PermuteSingleSrc shuffles
llvm-svn: 310632
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 47 |
1 files changed, 36 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 82425189a0a..fcacef84a66 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -838,6 +838,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw { TTI::SK_Alternate, MVT::v32i8, 1 }, // vpblendvb + { TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd + { TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps { TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq { TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vperm2i128 + 2 * vpshufb @@ -872,7 +874,16 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor - { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor + { TTI::SK_Alternate, MVT::v32i8, 3 }, // vpand + vpandn + vpor + + { TTI::SK_PermuteSingleSrc, MVT::v4f64, 3 }, // 2*vperm2f128 + vshufpd + { TTI::SK_PermuteSingleSrc, MVT::v4i64, 3 }, // 2*vperm2f128 + vshufpd + { TTI::SK_PermuteSingleSrc, MVT::v8f32, 4 }, // 2*vperm2f128 + 2*vshufps + { TTI::SK_PermuteSingleSrc, MVT::v8i32, 4 }, // 2*vperm2f128 + 2*vshufps + { TTI::SK_PermuteSingleSrc, MVT::v16i16, 8 }, // vextractf128 + 4*pshufb + // + 2*por + vinsertf128 + { TTI::SK_PermuteSingleSrc, MVT::v32i8, 8 }, // vextractf128 + 4*pshufb + // + 2*por + vinsertf128 }; if (ST->hasAVX()) @@ -899,11 +910,14 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb - { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por - { TTI::SK_Alternate, MVT::v16i8, 3 }, // pshufb + pshufb + por + { TTI::SK_Alternate, MVT::v8i16, 3 }, // 2*pshufb + por + { TTI::SK_Alternate, MVT::v16i8, 3 }, // 2*pshufb + por { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb - { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 } // pshufb + { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb + + { TTI::SK_PermuteTwoSrc, MVT::v8i16, 3 }, // 2*pshufb + por + { TTI::SK_PermuteTwoSrc, MVT::v16i8, 3 }, // 2*pshufb + por }; if (ST->hasSSSE3()) @@ -914,13 +928,13 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Broadcast, MVT::v2f64, 1 }, // shufpd { TTI::SK_Broadcast, MVT::v2i64, 1 }, // pshufd { TTI::SK_Broadcast, MVT::v4i32, 1 }, // pshufd - { TTI::SK_Broadcast, MVT::v8i16, 2 }, // pshuflw + pshufd + { TTI::SK_Broadcast, MVT::v8i16, 2 }, // pshuflw + pshufd { TTI::SK_Broadcast, MVT::v16i8, 3 }, // unpck + pshuflw + pshufd { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd - { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd + { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw // + 2*pshufd + 2*unpck + packus @@ -930,8 +944,17 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por { TTI::SK_Alternate, MVT::v16i8, 3 }, // pand + pandn + por - { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd - { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 } // pshufd + { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // shufpd + { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd + { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 }, // pshufd + { TTI::SK_PermuteSingleSrc, MVT::v8i16, 5 }, // 2*pshuflw + 2*pshufhw + // + pshufd/unpck + { TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, // 2*pshuflw + 2*pshufhw + // + 2*pshufd + 2*unpck + 2*packus + + { TTI::SK_PermuteTwoSrc, MVT::v2f64, 1 }, // shufpd + { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // shufpd + { TTI::SK_PermuteTwoSrc, MVT::v4i32, 2 }, // 2*{unpck,movsd,pshufd} }; if (ST->hasSSE2()) @@ -939,9 +962,11 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, return LT.first * Entry->Cost; static const CostTblEntry SSE1ShuffleTbl[] = { - { TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps - { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps - { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps + { TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps + { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps + { TTI::SK_Alternate, MVT::v4f32, 2 }, // 2*shufps + { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // shufps + { TTI::SK_PermuteTwoSrc, MVT::v4f32, 2 }, // 2*shufps }; if (ST->hasSSE1()) |

