summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-08-10 17:27:20 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-08-10 17:27:20 +0000
commit702e5fa391545898facfe1fa10c9bfd530921ce6 (patch)
treef4faa85608f1bb02de1fef714bcf73ad6ee12b92 /llvm/lib
parentc3e546fe42f64e7a19379a87746d05f339bfd801 (diff)
downloadbcm5719-llvm-702e5fa391545898facfe1fa10c9bfd530921ce6.tar.gz
bcm5719-llvm-702e5fa391545898facfe1fa10c9bfd530921ce6.zip
[CostModel][X86] Improve single src shuffle costs
Add missing SK_PermuteSingleSrc costs for AVX2 targets and earlier, also added some of the simpler SK_PermuteTwoSrc costs to support splitting of SK_PermuteSingleSrc shuffles llvm-svn: 310632
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp47
1 files changed, 36 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 82425189a0a..fcacef84a66 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -838,6 +838,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
{ TTI::SK_Alternate, MVT::v32i8, 1 }, // vpblendvb
+ { TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd
+ { TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps
{ TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq
{ TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd
{ TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vperm2i128 + 2 * vpshufb
@@ -872,7 +874,16 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps
{ TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps
{ TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
- { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor
+ { TTI::SK_Alternate, MVT::v32i8, 3 }, // vpand + vpandn + vpor
+
+ { TTI::SK_PermuteSingleSrc, MVT::v4f64, 3 }, // 2*vperm2f128 + vshufpd
+ { TTI::SK_PermuteSingleSrc, MVT::v4i64, 3 }, // 2*vperm2f128 + vshufpd
+ { TTI::SK_PermuteSingleSrc, MVT::v8f32, 4 }, // 2*vperm2f128 + 2*vshufps
+ { TTI::SK_PermuteSingleSrc, MVT::v8i32, 4 }, // 2*vperm2f128 + 2*vshufps
+ { TTI::SK_PermuteSingleSrc, MVT::v16i16, 8 }, // vextractf128 + 4*pshufb
+ // + 2*por + vinsertf128
+ { TTI::SK_PermuteSingleSrc, MVT::v32i8, 8 }, // vextractf128 + 4*pshufb
+ // + 2*por + vinsertf128
};
if (ST->hasAVX())
@@ -899,11 +910,14 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
{ TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
- { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por
- { TTI::SK_Alternate, MVT::v16i8, 3 }, // pshufb + pshufb + por
+ { TTI::SK_Alternate, MVT::v8i16, 3 }, // 2*pshufb + por
+ { TTI::SK_Alternate, MVT::v16i8, 3 }, // 2*pshufb + por
{ TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb
- { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 } // pshufb
+ { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
+
+ { TTI::SK_PermuteTwoSrc, MVT::v8i16, 3 }, // 2*pshufb + por
+ { TTI::SK_PermuteTwoSrc, MVT::v16i8, 3 }, // 2*pshufb + por
};
if (ST->hasSSSE3())
@@ -914,13 +928,13 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Broadcast, MVT::v2f64, 1 }, // shufpd
{ TTI::SK_Broadcast, MVT::v2i64, 1 }, // pshufd
{ TTI::SK_Broadcast, MVT::v4i32, 1 }, // pshufd
- { TTI::SK_Broadcast, MVT::v8i16, 2 }, // pshuflw + pshufd
+ { TTI::SK_Broadcast, MVT::v8i16, 2 }, // pshuflw + pshufd
{ TTI::SK_Broadcast, MVT::v16i8, 3 }, // unpck + pshuflw + pshufd
{ TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd
{ TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd
{ TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd
- { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
+ { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
{ TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
// + 2*pshufd + 2*unpck + packus
@@ -930,8 +944,17 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
{ TTI::SK_Alternate, MVT::v16i8, 3 }, // pand + pandn + por
- { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd
- { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 } // pshufd
+ { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // shufpd
+ { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd
+ { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1 }, // pshufd
+ { TTI::SK_PermuteSingleSrc, MVT::v8i16, 5 }, // 2*pshuflw + 2*pshufhw
+ // + pshufd/unpck
+ { TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, // 2*pshuflw + 2*pshufhw
+ // + 2*pshufd + 2*unpck + 2*packus
+
+ { TTI::SK_PermuteTwoSrc, MVT::v2f64, 1 }, // shufpd
+ { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, // shufpd
+ { TTI::SK_PermuteTwoSrc, MVT::v4i32, 2 }, // 2*{unpck,movsd,pshufd}
};
if (ST->hasSSE2())
@@ -939,9 +962,11 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry SSE1ShuffleTbl[] = {
- { TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps
- { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
- { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps
+ { TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps
+ { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
+ { TTI::SK_Alternate, MVT::v4f32, 2 }, // 2*shufps
+ { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // shufps
+ { TTI::SK_PermuteTwoSrc, MVT::v4f32, 2 }, // 2*shufps
};
if (ST->hasSSE1())
OpenPOWER on IntegriCloud