summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
diff options
context:
space:
mode:
authorArnold Schwaighofer <aschwaighofer@apple.com>2013-04-08 18:05:48 +0000
committerArnold Schwaighofer <aschwaighofer@apple.com>2013-04-08 18:05:48 +0000
commitf47d2d7f6b8b57a227b029710dd9f78f1de4dc11 (patch)
tree56552b78b4bb0a4921398a3859e21d5b8ae77d4b /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent32efd25b939432d1c5b365fbb0438c3644a546f1 (diff)
downloadbcm5719-llvm-f47d2d7f6b8b57a227b029710dd9f78f1de4dc11.tar.gz
bcm5719-llvm-f47d2d7f6b8b57a227b029710dd9f78f1de4dc11.zip
X86 cost model: Model cost for uitofp and sitofp on SSE2
The costs are overfitted so that I can still use the legalization factor. For example the following kernel has about half the throughput vectorized than unvectorized when compiled with SSE2. Before this patch we would vectorize it. unsigned short A[1024]; double B[1024]; void f() { int i; for (i = 0; i < 1024; ++i) { B[i] = (double) A[i]; } } radar://13599001 llvm-svn: 179033
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp37
1 files changed, 34 insertions, 3 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index a98c6991192..488c2a42b2d 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -334,12 +334,43 @@ unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(Src);
+ std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(Dst);
+
+ static const TypeConversionCostTblEntry<MVT> SSE2ConvTbl[] = {
+ // These are somewhat magic numbers justified by looking at the output of
+ // Intel's IACA, running some kernels and making sure when we take
+ // legalization into account the throughput will be overestimated.
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+ // There are faster sequences for float conversions.
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+ };
+
+ if (ST->hasSSE2() && !ST->hasAVX()) {
+ int Idx = ConvertCostTableLookup<MVT>(SSE2ConvTbl,
+ array_lengthof(SSE2ConvTbl),
+ ISD, LTDest.second, LTSrc.second);
+ if (Idx != -1)
+ return LTSrc.first * SSE2ConvTbl[Idx].Cost;
+ }
+
EVT SrcTy = TLI->getValueType(Src);
EVT DstTy = TLI->getValueType(Dst);
- if (!SrcTy.isSimple() || !DstTy.isSimple())
- return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
-
static const TypeConversionCostTblEntry<MVT> AVXConversionTbl[] = {
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
OpenPOWER on IntegriCloud