summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2019-01-15 18:43:41 +0000
committerNikita Popov <nikita.ppv@gmail.com>2019-01-15 18:43:41 +0000
commitd3b86b79fa99d7c9253cc2911aeda0655be9f7c0 (patch)
tree49b9b61bf30aab82b74abf7ccb06d91558d786bd /llvm/lib/Target
parent6f9d49cdde65898c3fbd74a7a98e7438216b0ba5 (diff)
downloadbcm5719-llvm-d3b86b79fa99d7c9253cc2911aeda0655be9f7c0.tar.gz
bcm5719-llvm-d3b86b79fa99d7c9253cc2911aeda0655be9f7c0.zip
Reapply "[CodeGen][X86] Expand USUBSAT to UMAX+SUB, also for vectors"
Related to https://bugs.llvm.org/show_bug.cgi?id=40123. Rather than scalarizing, expand a vector USUBSAT into UMAX+SUB, which produces much better code for X86. Reapplying with updated SLPVectorizer tests. Differential Revision: https://reviews.llvm.org/D56636 llvm-svn: 351219
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp7
1 files changed, 7 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index a3592565c0f..36929a4f543 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1780,6 +1780,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::CTPOP, MVT::v16i32, 24 },
{ ISD::CTTZ, MVT::v8i64, 20 },
{ ISD::CTTZ, MVT::v16i32, 28 },
+ { ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd
+ { ISD::USUBSAT, MVT::v2i64, 2 }, // pmaxuq + psubq
+ { ISD::USUBSAT, MVT::v4i64, 2 }, // pmaxuq + psubq
+ { ISD::USUBSAT, MVT::v8i64, 2 }, // pmaxuq + psubq
};
static const CostTblEntry XOPCostTbl[] = {
{ ISD::BITREVERSE, MVT::v4i64, 4 },
@@ -1823,6 +1827,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::UADDSAT, MVT::v32i8, 1 },
{ ISD::USUBSAT, MVT::v16i16, 1 },
{ ISD::USUBSAT, MVT::v32i8, 1 },
+ { ISD::USUBSAT, MVT::v8i32, 2 }, // pmaxud + psubd
{ ISD::FSQRT, MVT::f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/
{ ISD::FSQRT, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/
@@ -1858,6 +1863,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::UADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert
+ { ISD::USUBSAT, MVT::v8i32, 6 }, // 2 x 128-bit Op + extract/insert
{ ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/
@@ -1878,6 +1884,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
{ ISD::FSQRT, MVT::v2f64, 70 }, // sqrtpd
};
static const CostTblEntry SSE42CostTbl[] = {
+ { ISD::USUBSAT, MVT::v4i32, 2 }, // pmaxud + psubd
{ ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
};
OpenPOWER on IntegriCloud