diff options
author | Nikita Popov <nikita.ppv@gmail.com> | 2018-12-18 18:28:22 +0000 |
---|---|---|
committer | Nikita Popov <nikita.ppv@gmail.com> | 2018-12-18 18:28:22 +0000 |
commit | f6058ff140ae5f79b11507e11b73501ee3df0596 (patch) | |
tree | d0e41f79d8d51d0a3a63d21a56afd75a83094e7a /llvm/lib/Target | |
parent | 20a6db5a84ce8947d188becadd04f80ec353e805 (diff) | |
download | bcm5719-llvm-f6058ff140ae5f79b11507e11b73501ee3df0596.tar.gz bcm5719-llvm-f6058ff140ae5f79b11507e11b73501ee3df0596.zip |
[X86] Use SADDSAT/SSUBSAT instead of ADDS/SUBS
Migrate the X86 backend from X86ISD opcodes ADDS and SUBS to generic
ISD opcodes SADDSAT and SSUBSAT. This also improves scodegen for
@llvm.sadd.sat() and @llvm.ssub.sat() intrinsics.
This is a followup to D55787 and part of PR40056.
Differential Revision: https://reviews.llvm.org/D55833
llvm-svn: 349520
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 40 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 24 |
6 files changed, 48 insertions, 34 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7d131661649..c316e96f34d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -830,14 +830,20 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal); - setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal); + setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal); setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal); + setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal); + setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal); + setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal); setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal); + setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal); // Use widening instead of promotion. for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16 }) { - setOperationAction(ISD::UADDSAT, VT, Custom); - setOperationAction(ISD::USUBSAT, VT, Custom); + setOperationAction(ISD::UADDSAT, VT, Custom); + setOperationAction(ISD::SADDSAT, VT, Custom); + setOperationAction(ISD::USUBSAT, VT, Custom); + setOperationAction(ISD::SSUBSAT, VT, Custom); } setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); @@ -1212,9 +1218,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UMIN, MVT::v4i64, Custom); setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); - setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); + setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); + setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); + setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); + setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); + setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom); @@ -1334,7 +1344,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::TRUNCATE, VT, Custom); setOperationAction(ISD::UADDSAT, VT, Custom); + setOperationAction(ISD::SADDSAT, VT, Custom); setOperationAction(ISD::USUBSAT, VT, Custom); + setOperationAction(ISD::SSUBSAT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); @@ -1596,7 +1608,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MUL, VT, Custom); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::UADDSAT, VT, Custom); + setOperationAction(ISD::SADDSAT, VT, Custom); setOperationAction(ISD::USUBSAT, VT, Custom); + setOperationAction(ISD::SSUBSAT, VT, Custom); setOperationAction(ISD::TRUNCATE, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); @@ -1678,7 +1692,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UMIN, VT, Legal); setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::UADDSAT, VT, Legal); + setOperationAction(ISD::SADDSAT, VT, Legal); setOperationAction(ISD::USUBSAT, VT, Legal); + setOperationAction(ISD::SSUBSAT, VT, Legal); // The condition codes aren't legal in SSE/AVX and under AVX512 we use // setcc all the way to isel and prefer SETGT in some isel patterns. @@ -23388,15 +23404,17 @@ static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) { return split256IntArith(Op, DAG); } -static SDValue LowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); if (VT.getScalarType() == MVT::i1) { SDLoc dl(Op); switch (Op.getOpcode()) { default: llvm_unreachable("Expected saturated arithmetic opcode"); case ISD::UADDSAT: + case ISD::SADDSAT: return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1)); case ISD::USUBSAT: + case ISD::SSUBSAT: return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), DAG.getNOT(dl, Op.getOperand(1), VT)); } @@ -26194,7 +26212,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADD: case ISD::SUB: return LowerADD_SUB(Op, DAG); case ISD::UADDSAT: - case ISD::USUBSAT: return LowerUADDSAT_USUBSAT(Op, DAG); + case ISD::SADDSAT: + case ISD::USUBSAT: + case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG); case ISD::SMAX: case ISD::SMIN: case ISD::UMAX: @@ -26277,11 +26297,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::UADDSAT: + case ISD::SADDSAT: case ISD::USUBSAT: + case ISD::SSUBSAT: case X86ISD::VPMADDWD: case X86ISD::AVG: { - // Legalize types for ISD::UADDSAT/USUBSAT and X86ISD::AVG/VPMADDWD - // by widening. + // Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and + // X86ISD::AVG/VPMADDWD by widening. assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); EVT VT = N->getValueType(0); @@ -27228,8 +27250,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FGETEXPS_RND: return "X86ISD::FGETEXPS_RND"; case X86ISD::SCALEF: return "X86ISD::SCALEF"; case X86ISD::SCALEFS: return "X86ISD::SCALEFS"; - case X86ISD::ADDS: return "X86ISD::ADDS"; - case X86ISD::SUBS: return "X86ISD::SUBS"; case X86ISD::AVG: return "X86ISD::AVG"; case X86ISD::MULHRS: return "X86ISD::MULHRS"; case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index f95031d87b9..17fd315a2b4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -226,10 +226,6 @@ namespace llvm { SCALEF, SCALEFS, - // Integer add/sub with signed saturation. - ADDS, - SUBS, - // Unsigned Integer average. AVG, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 93fdb9a130d..7e60b9caf05 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4830,9 +4830,9 @@ defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, SchedWriteVecALU, 1>; defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, SchedWriteVecALU, 0>; -defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds, +defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat, SchedWriteVecALU, HasBWI, 1>; -defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs, +defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat, SchedWriteVecALU, HasBWI, 0>; defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, SchedWriteVecALU, HasBWI, 1>; diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index b72e6f5d59b..0b98abaa7a2 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -227,8 +227,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; -def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>; -def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>; def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>; def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp, [SDNPCommutative]>; def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 61b9cf78d83..94cd5a611f2 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3623,9 +3623,9 @@ defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, SchedWriteVecALU, 1, NoVLX>; defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, SchedWriteVecALU, 1, NoVLX>; -defm PADDSB : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8, +defm PADDSB : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; -defm PADDSW : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16, +defm PADDSW : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; @@ -3645,9 +3645,9 @@ defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, SchedWriteVecALU, 0, NoVLX>; defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, SchedWriteVecALU, 0, NoVLX>; -defm PSUBSB : PDI_binop_all<0xE8, "psubsb", X86subs, v16i8, v32i8, +defm PSUBSB : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8, SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; -defm PSUBSW : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16, +defm PSUBSW : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16, SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8, SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index f1ed8ca48cd..fdcd31c3f9d 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -319,8 +319,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0), X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0), - X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0), + X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0), + X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0), X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0), @@ -361,8 +361,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0), - X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0), + X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0), + X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0), X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0), @@ -920,8 +920,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0), X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(avx512_padds_b_512, INTR_TYPE_2OP, X86ISD::ADDS, 0), - X86_INTRINSIC_DATA(avx512_padds_w_512, INTR_TYPE_2OP, X86ISD::ADDS, 0), + X86_INTRINSIC_DATA(avx512_padds_b_512, INTR_TYPE_2OP, ISD::SADDSAT, 0), + X86_INTRINSIC_DATA(avx512_padds_w_512, INTR_TYPE_2OP, ISD::SADDSAT, 0), X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0), X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0), @@ -1004,8 +1004,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0), X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psubs_b_512, INTR_TYPE_2OP, X86ISD::SUBS, 0), - X86_INTRINSIC_DATA(avx512_psubs_w_512, INTR_TYPE_2OP, X86ISD::SUBS, 0), + X86_INTRINSIC_DATA(avx512_psubs_b_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0), + X86_INTRINSIC_DATA(avx512_psubs_w_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), @@ -1168,8 +1168,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0), - X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0), - X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0), + X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0), + X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0), X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0), X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0), X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0), @@ -1191,8 +1191,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0), - X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0), + X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0), + X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0), X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ), X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE), X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT), |