From c63f93a197a14d263c49ea65e2c3df7af4ea3efc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 16 Aug 2017 13:50:20 +0000 Subject: [CostModel][X86][XOP] Improve costs for XOP shuffles VPPERM/VPERMIL2PD/VPERMIL2PS all provide more effective 2-input shuffles than regular AVX instructions llvm-svn: 311005 --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp') diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index ed56e4512f6..2f44d610840 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -861,6 +861,28 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; + static const CostTblEntry XOPShuffleTbl[] = { + { TTI::SK_PermuteSingleSrc, MVT::v4f64, 2 }, // vperm2f128 + vpermil2pd + { TTI::SK_PermuteSingleSrc, MVT::v8f32, 2 }, // vperm2f128 + vpermil2ps + { TTI::SK_PermuteSingleSrc, MVT::v4i64, 2 }, // vperm2f128 + vpermil2pd + { TTI::SK_PermuteSingleSrc, MVT::v8i32, 2 }, // vperm2f128 + vpermil2ps + { TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vextractf128 + 2*vpperm + // + vinsertf128 + { TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 }, // vextractf128 + 2*vpperm + // + vinsertf128 + + { TTI::SK_PermuteTwoSrc, MVT::v16i16, 9 }, // 2*vextractf128 + 6*vpperm + // + vinsertf128 + { TTI::SK_PermuteTwoSrc, MVT::v8i16, 1 }, // vpperm + { TTI::SK_PermuteTwoSrc, MVT::v32i8, 9 }, // 2*vextractf128 + 6*vpperm + // + vinsertf128 + { TTI::SK_PermuteTwoSrc, MVT::v16i8, 1 }, // vpperm + }; + + if (ST->hasXOP()) + if (const auto *Entry = CostTableLookup(XOPShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; + static const CostTblEntry AVX1ShuffleTbl[] = { { TTI::SK_Broadcast, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd { TTI::SK_Broadcast, MVT::v8f32, 2 }, // vperm2f128 + vpermilps -- cgit v1.2.3