summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-10-12 13:21:50 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-10-12 13:21:50 +0000
commit1b59a16c0b8bd89445b97eee0fde2a152001dbaf (patch)
tree4a8db0f7b553bc9e38ed955915a44b71eb0df974 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parentba229557dd208a39086c5e2bd8db619dca7d9b45 (diff)
downloadbcm5719-llvm-1b59a16c0b8bd89445b97eee0fde2a152001dbaf.tar.gz
bcm5719-llvm-1b59a16c0b8bd89445b97eee0fde2a152001dbaf.zip
[CostModel][X86] Improve sum reduction costs.
I can't see any notable differences in costs between SSE2 and SSE42 arches for FADD/ADD reduction, so I've lowered the target to just SSE2. I've also added vXi8 sum reduction costs in line with the PSADBW codegen and discussions on PR42674. llvm-svn: 374655
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp45
1 files changed, 23 insertions, 22 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 2f419b78f83..696350dbfa7 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2488,7 +2488,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
// and make it as the cost.
- static const CostTblEntry SSE42CostTblPairWise[] = {
+ static const CostTblEntry SSE2CostTblPairWise[] = {
{ ISD::FADD, MVT::v2f64, 2 },
{ ISD::FADD, MVT::v4f32, 4 },
{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
@@ -2497,23 +2497,23 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
{ ISD::ADD, MVT::v2i16, 3 }, // FIXME: chosen to be less than v4i16
{ ISD::ADD, MVT::v4i16, 4 }, // FIXME: chosen to be less than v8i16
{ ISD::ADD, MVT::v8i16, 5 },
+ { ISD::ADD, MVT::v2i8, 2 },
+ { ISD::ADD, MVT::v4i8, 2 },
+ { ISD::ADD, MVT::v8i8, 2 },
+ { ISD::ADD, MVT::v16i8, 3 },
};
static const CostTblEntry AVX1CostTblPairWise[] = {
- { ISD::FADD, MVT::v4f32, 4 },
{ ISD::FADD, MVT::v4f64, 5 },
{ ISD::FADD, MVT::v8f32, 7 },
{ ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
- { ISD::ADD, MVT::v2i32, 2 }, // FIXME: chosen to be less than v4i32
- { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
{ ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8".
- { ISD::ADD, MVT::v2i16, 3 }, // FIXME: chosen to be less than v4i16
- { ISD::ADD, MVT::v4i16, 4 }, // FIXME: chosen to be less than v8i16
- { ISD::ADD, MVT::v8i16, 5 },
{ ISD::ADD, MVT::v8i32, 5 },
+ { ISD::ADD, MVT::v16i16, 6 },
+ { ISD::ADD, MVT::v32i8, 4 },
};
- static const CostTblEntry SSE42CostTblNoPairWise[] = {
+ static const CostTblEntry SSE2CostTblNoPairWise[] = {
{ ISD::FADD, MVT::v2f64, 2 },
{ ISD::FADD, MVT::v4f32, 4 },
{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
@@ -2522,20 +2522,21 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
{ ISD::ADD, MVT::v2i16, 2 }, // The data reported by the IACA tool is "4.3".
{ ISD::ADD, MVT::v4i16, 3 }, // The data reported by the IACA tool is "4.3".
{ ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
+ { ISD::ADD, MVT::v2i8, 2 },
+ { ISD::ADD, MVT::v4i8, 2 },
+ { ISD::ADD, MVT::v8i8, 2 },
+ { ISD::ADD, MVT::v16i8, 3 },
};
static const CostTblEntry AVX1CostTblNoPairWise[] = {
- { ISD::FADD, MVT::v4f32, 3 },
{ ISD::FADD, MVT::v4f64, 3 },
+ { ISD::FADD, MVT::v4f32, 3 },
{ ISD::FADD, MVT::v8f32, 4 },
{ ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
- { ISD::ADD, MVT::v2i32, 2 }, // FIXME: chosen to be less than v4i32
- { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "2.8".
{ ISD::ADD, MVT::v4i64, 3 },
- { ISD::ADD, MVT::v2i16, 2 }, // The data reported by the IACA tool is "4.3".
- { ISD::ADD, MVT::v4i16, 3 }, // The data reported by the IACA tool is "4.3".
- { ISD::ADD, MVT::v8i16, 4 },
{ ISD::ADD, MVT::v8i32, 5 },
+ { ISD::ADD, MVT::v16i16, 5 },
+ { ISD::ADD, MVT::v32i8, 4 },
};
int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -2552,16 +2553,16 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
return Entry->Cost;
- if (ST->hasSSE42())
- if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy))
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
return Entry->Cost;
} else {
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
return Entry->Cost;
- if (ST->hasSSE42())
- if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy))
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
return Entry->Cost;
}
}
@@ -2575,16 +2576,16 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
return LT.first * Entry->Cost;
- if (ST->hasSSE42())
- if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy))
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTblPairWise, ISD, MTy))
return LT.first * Entry->Cost;
} else {
if (ST->hasAVX())
if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
return LT.first * Entry->Cost;
- if (ST->hasSSE42())
- if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy))
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
return LT.first * Entry->Cost;
}
OpenPOWER on IntegriCloud