diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 65 |
1 files changed, 52 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 0ff08776c07..1181181a3c5 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -50,6 +50,8 @@ using namespace llvm; #define DEBUG_TYPE "x86tti" +extern cl::opt<bool> ExperimentalVectorWideningLegalization; + //===----------------------------------------------------------------------===// // // X86 cost model. @@ -918,7 +920,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, // FIXME: We can use permq for 64-bit or larger extracts from 256-bit // vectors. int OrigSubElts = SubTp->getVectorNumElements(); - if (NumSubElts > OrigSubElts && + if (ExperimentalVectorWideningLegalization && + NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 && (NumSubElts % OrigSubElts) == 0 && LT.second.getVectorElementType() == SubLT.second.getVectorElementType() && @@ -1330,6 +1333,12 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, // TODO: For AVX512DQ + AVX512VL, we also have cheap casts for 128-bit and // 256-bit wide vectors. + // Used with widening legalization + static const TypeConversionCostTblEntry AVX512FConversionTblWide[] = { + { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, + }; + static const TypeConversionCostTblEntry AVX512FConversionTbl[] = { { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 }, { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 }, @@ -1347,8 +1356,6 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, - { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, @@ -1401,19 +1408,28 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 2 }, }; + static const TypeConversionCostTblEntry AVX2ConversionTblWide[] = { + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, + }; + static const TypeConversionCostTblEntry AVX2ConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, - { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, - { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, + { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, @@ -1432,18 +1448,24 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 }, }; + static const TypeConversionCostTblEntry AVXConversionTblWide[] = { + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 }, + }; + static const TypeConversionCostTblEntry AVXConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 }, { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, @@ -1642,18 +1664,35 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, SimpleDstTy, SimpleSrcTy)) return Entry->Cost; + if (ST->hasAVX512() && ExperimentalVectorWideningLegalization) + if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTblWide, ISD, + SimpleDstTy, SimpleSrcTy)) + return Entry->Cost; + if (ST->hasAVX512()) if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) return Entry->Cost; } + if (ST->hasAVX2() && ExperimentalVectorWideningLegalization) { + if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTblWide, ISD, + SimpleDstTy, SimpleSrcTy)) + return Entry->Cost; + } + if (ST->hasAVX2()) { if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) return Entry->Cost; } + if (ST->hasAVX() && ExperimentalVectorWideningLegalization) { + if (const auto *Entry = ConvertCostTableLookup(AVXConversionTblWide, ISD, + SimpleDstTy, SimpleSrcTy)) + return Entry->Cost; + } + if (ST->hasAVX()) { if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) @@ -2520,7 +2559,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy, // in the table. // FIXME: Is there a better way to do this? EVT VT = TLI->getValueType(DL, ValTy); - if (VT.isSimple()) { + if (VT.isSimple() && ExperimentalVectorWideningLegalization) { MVT MTy = VT.getSimpleVT(); if (IsPairwise) { if (ST->hasAVX()) |