summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
diff options
context:
space:
mode:
authorCong Hou <congh@google.com>2015-12-11 00:31:39 +0000
committerCong Hou <congh@google.com>2015-12-11 00:31:39 +0000
commit59898d8c6806990a39c62321a13927dbac8a2029 (patch)
tree10f5412124c3f67ae416be7a29147fde6c6b22da /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent533ff009ff210a2068119f0fea32c5bfab76f238 (diff)
downloadbcm5719-llvm-59898d8c6806990a39c62321a13927dbac8a2029.tar.gz
bcm5719-llvm-59898d8c6806990a39c62321a13927dbac8a2029.zip
[X86][SSE] Update the cost table for integer-integer conversions on SSE2/SSE4.1.
Previously in the conversion cost table there are no entries for integer-integer conversions on SSE2. This will result in imprecise costs for certain vectorized operations. This patch adds those entries for SSE2 and SSE4.1. The cost numbers are counted from the result of running llc on the new test case in this patch. Differential revision: http://reviews.llvm.org/D15132 llvm-svn: 255315
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp81
1 files changed, 79 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index cf7a826ea85..d33d5758412 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -528,6 +528,9 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ // FIXME: Need a better design of the cost table to handle non-simple types of
+ // potential massive combinations (elem_num x src_type x dst_type).
+
static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
{ ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
@@ -705,7 +708,38 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 },
};
- static const TypeConversionCostTblEntry SSE2ConvTbl[] = {
+ static const TypeConversionCostTblEntry SSE41ConversionTbl[] = {
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 4 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 4 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 2 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
+ { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 2 },
+
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 },
+ { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 30 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 },
+ };
+
+ static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
// These are somewhat magic numbers justified by looking at the output of
// Intel's IACA, running some kernels and making sure when we take
// legalization into account the throughput will be overestimated.
@@ -726,13 +760,42 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 9 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 6 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 6 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 3 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 },
+
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 14 },
+ { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 7 },
+ { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 31 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 },
+ { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 },
};
std::pair<int, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
std::pair<int, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
if (ST->hasSSE2() && !ST->hasAVX()) {
- if (const auto *Entry = ConvertCostTableLookup(SSE2ConvTbl, ISD,
+ if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
LTDest.second, LTSrc.second))
return LTSrc.first * Entry->Cost;
}
@@ -770,6 +833,20 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
return Entry->Cost;
}
+ if (ST->hasSSE41()) {
+ if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
+ }
+
+ if (ST->hasSSE2()) {
+ if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
+ DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT()))
+ return Entry->Cost;
+ }
+
return BaseT::getCastInstrCost(Opcode, Dst, Src);
}
OpenPOWER on IntegriCloud