diff options
| author | Nikita Popov <nikita.ppv@gmail.com> | 2019-01-15 18:43:41 +0000 |
|---|---|---|
| committer | Nikita Popov <nikita.ppv@gmail.com> | 2019-01-15 18:43:41 +0000 |
| commit | d3b86b79fa99d7c9253cc2911aeda0655be9f7c0 (patch) | |
| tree | 49b9b61bf30aab82b74abf7ccb06d91558d786bd /llvm/lib/Target | |
| parent | 6f9d49cdde65898c3fbd74a7a98e7438216b0ba5 (diff) | |
| download | bcm5719-llvm-d3b86b79fa99d7c9253cc2911aeda0655be9f7c0.tar.gz bcm5719-llvm-d3b86b79fa99d7c9253cc2911aeda0655be9f7c0.zip | |
Reapply "[CodeGen][X86] Expand USUBSAT to UMAX+SUB, also for vectors"
Related to https://bugs.llvm.org/show_bug.cgi?id=40123.
Rather than scalarizing, expand a vector USUBSAT into UMAX+SUB,
which produces much better code for X86.
Reapplying with updated SLPVectorizer tests.
Differential Revision: https://reviews.llvm.org/D56636
llvm-svn: 351219
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index a3592565c0f..36929a4f543 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1780,6 +1780,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::CTPOP, MVT::v16i32, 24 }, { ISD::CTTZ, MVT::v8i64, 20 }, { ISD::CTTZ, MVT::v16i32, 28 }, + { ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd + { ISD::USUBSAT, MVT::v2i64, 2 }, // pmaxuq + psubq + { ISD::USUBSAT, MVT::v4i64, 2 }, // pmaxuq + psubq + { ISD::USUBSAT, MVT::v8i64, 2 }, // pmaxuq + psubq }; static const CostTblEntry XOPCostTbl[] = { { ISD::BITREVERSE, MVT::v4i64, 4 }, @@ -1823,6 +1827,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::UADDSAT, MVT::v32i8, 1 }, { ISD::USUBSAT, MVT::v16i16, 1 }, { ISD::USUBSAT, MVT::v32i8, 1 }, + { ISD::USUBSAT, MVT::v8i32, 2 }, // pmaxud + psubd { ISD::FSQRT, MVT::f32, 7 }, // Haswell from http://www.agner.org/ { ISD::FSQRT, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/ { ISD::FSQRT, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/ @@ -1858,6 +1863,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::UADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert { ISD::USUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert { ISD::USUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert + { ISD::USUBSAT, MVT::v8i32, 6 }, // 2 x 128-bit Op + extract/insert { ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/ { ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/ { ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/ @@ -1878,6 +1884,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::FSQRT, MVT::v2f64, 70 }, // sqrtpd }; static const CostTblEntry SSE42CostTbl[] = { + { ISD::USUBSAT, MVT::v4i32, 2 }, // pmaxud + psubd { ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/ { ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/ }; |

