summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp65
1 files changed, 52 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 0ff08776c07..1181181a3c5 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -50,6 +50,8 @@ using namespace llvm;
#define DEBUG_TYPE "x86tti"
+extern cl::opt<bool> ExperimentalVectorWideningLegalization;
+
//===----------------------------------------------------------------------===//
//
// X86 cost model.
@@ -918,7 +920,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
// FIXME: We can use permq for 64-bit or larger extracts from 256-bit
// vectors.
int OrigSubElts = SubTp->getVectorNumElements();
- if (NumSubElts > OrigSubElts &&
+ if (ExperimentalVectorWideningLegalization &&
+ NumSubElts > OrigSubElts &&
(Index % OrigSubElts) == 0 && (NumSubElts % OrigSubElts) == 0 &&
LT.second.getVectorElementType() ==
SubLT.second.getVectorElementType() &&
@@ -1330,6 +1333,12 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
// TODO: For AVX512DQ + AVX512VL, we also have cheap casts for 128-bit and
// 256-bit wide vectors.
+ // Used with widening legalization
+ static const TypeConversionCostTblEntry AVX512FConversionTblWide[] = {
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 },
+ };
+
static const TypeConversionCostTblEntry AVX512FConversionTbl[] = {
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
@@ -1347,8 +1356,6 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
- { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 },
- { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
@@ -1401,19 +1408,28 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 2 },
};
+ static const TypeConversionCostTblEntry AVX2ConversionTblWide[] = {
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
+ };
+
static const TypeConversionCostTblEntry AVX2ConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
- { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
- { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
- { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
- { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
@@ -1432,18 +1448,24 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 },
};
+ static const TypeConversionCostTblEntry AVXConversionTblWide[] = {
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 },
+ };
+
static const TypeConversionCostTblEntry AVXConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
- { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
- { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
@@ -1642,18 +1664,35 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
SimpleDstTy, SimpleSrcTy))
return Entry->Cost;
+ if (ST->hasAVX512() && ExperimentalVectorWideningLegalization)
+ if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTblWide, ISD,
+ SimpleDstTy, SimpleSrcTy))
+ return Entry->Cost;
+
if (ST->hasAVX512())
if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD,
SimpleDstTy, SimpleSrcTy))
return Entry->Cost;
}
+ if (ST->hasAVX2() && ExperimentalVectorWideningLegalization) {
+ if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTblWide, ISD,
+ SimpleDstTy, SimpleSrcTy))
+ return Entry->Cost;
+ }
+
if (ST->hasAVX2()) {
if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
SimpleDstTy, SimpleSrcTy))
return Entry->Cost;
}
+ if (ST->hasAVX() && ExperimentalVectorWideningLegalization) {
+ if (const auto *Entry = ConvertCostTableLookup(AVXConversionTblWide, ISD,
+ SimpleDstTy, SimpleSrcTy))
+ return Entry->Cost;
+ }
+
if (ST->hasAVX()) {
if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
SimpleDstTy, SimpleSrcTy))
@@ -2520,7 +2559,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
// in the table.
// FIXME: Is there a better way to do this?
EVT VT = TLI->getValueType(DL, ValTy);
- if (VT.isSimple()) {
+ if (VT.isSimple() && ExperimentalVectorWideningLegalization) {
MVT MTy = VT.getSimpleVT();
if (IsPairwise) {
if (ST->hasAVX())
OpenPOWER on IntegriCloud