summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2018-12-18 18:28:22 +0000
committerNikita Popov <nikita.ppv@gmail.com>2018-12-18 18:28:22 +0000
commitf6058ff140ae5f79b11507e11b73501ee3df0596 (patch)
treed0e41f79d8d51d0a3a63d21a56afd75a83094e7a /llvm/lib/Target
parent20a6db5a84ce8947d188becadd04f80ec353e805 (diff)
downloadbcm5719-llvm-f6058ff140ae5f79b11507e11b73501ee3df0596.tar.gz
bcm5719-llvm-f6058ff140ae5f79b11507e11b73501ee3df0596.zip
[X86] Use SADDSAT/SSUBSAT instead of ADDS/SUBS
Migrate the X86 backend from X86ISD opcodes ADDS and SUBS to generic ISD opcodes SADDSAT and SSUBSAT. This also improves scodegen for @llvm.sadd.sat() and @llvm.ssub.sat() intrinsics. This is a followup to D55787 and part of PR40056. Differential Revision: https://reviews.llvm.org/D55833 llvm-svn: 349520
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp40
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td4
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td2
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td8
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h24
6 files changed, 48 insertions, 34 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7d131661649..c316e96f34d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -830,14 +830,20 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
- setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
// Use widening instead of promotion.
for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
MVT::v4i16, MVT::v2i16 }) {
- setOperationAction(ISD::UADDSAT, VT, Custom);
- setOperationAction(ISD::USUBSAT, VT, Custom);
+ setOperationAction(ISD::UADDSAT, VT, Custom);
+ setOperationAction(ISD::SADDSAT, VT, Custom);
+ setOperationAction(ISD::USUBSAT, VT, Custom);
+ setOperationAction(ISD::SSUBSAT, VT, Custom);
}
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
@@ -1212,9 +1218,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
- setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
@@ -1334,7 +1344,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::UADDSAT, VT, Custom);
+ setOperationAction(ISD::SADDSAT, VT, Custom);
setOperationAction(ISD::USUBSAT, VT, Custom);
+ setOperationAction(ISD::SSUBSAT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -1596,7 +1608,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::UADDSAT, VT, Custom);
+ setOperationAction(ISD::SADDSAT, VT, Custom);
setOperationAction(ISD::USUBSAT, VT, Custom);
+ setOperationAction(ISD::SSUBSAT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
@@ -1678,7 +1692,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::UADDSAT, VT, Legal);
+ setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
+ setOperationAction(ISD::SSUBSAT, VT, Legal);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -23388,15 +23404,17 @@ static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
return split256IntArith(Op, DAG);
}
-static SDValue LowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
if (VT.getScalarType() == MVT::i1) {
SDLoc dl(Op);
switch (Op.getOpcode()) {
default: llvm_unreachable("Expected saturated arithmetic opcode");
case ISD::UADDSAT:
+ case ISD::SADDSAT:
return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1));
case ISD::USUBSAT:
+ case ISD::SSUBSAT:
return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
DAG.getNOT(dl, Op.getOperand(1), VT));
}
@@ -26194,7 +26212,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADD:
case ISD::SUB: return LowerADD_SUB(Op, DAG);
case ISD::UADDSAT:
- case ISD::USUBSAT: return LowerUADDSAT_USUBSAT(Op, DAG);
+ case ISD::SADDSAT:
+ case ISD::USUBSAT:
+ case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG);
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
@@ -26277,11 +26297,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::UADDSAT:
+ case ISD::SADDSAT:
case ISD::USUBSAT:
+ case ISD::SSUBSAT:
case X86ISD::VPMADDWD:
case X86ISD::AVG: {
- // Legalize types for ISD::UADDSAT/USUBSAT and X86ISD::AVG/VPMADDWD
- // by widening.
+ // Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and
+ // X86ISD::AVG/VPMADDWD by widening.
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
EVT VT = N->getValueType(0);
@@ -27228,8 +27250,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FGETEXPS_RND: return "X86ISD::FGETEXPS_RND";
case X86ISD::SCALEF: return "X86ISD::SCALEF";
case X86ISD::SCALEFS: return "X86ISD::SCALEFS";
- case X86ISD::ADDS: return "X86ISD::ADDS";
- case X86ISD::SUBS: return "X86ISD::SUBS";
case X86ISD::AVG: return "X86ISD::AVG";
case X86ISD::MULHRS: return "X86ISD::MULHRS";
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index f95031d87b9..17fd315a2b4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -226,10 +226,6 @@ namespace llvm {
SCALEF,
SCALEFS,
- // Integer add/sub with signed saturation.
- ADDS,
- SUBS,
-
// Unsigned Integer average.
AVG,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 93fdb9a130d..7e60b9caf05 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4830,9 +4830,9 @@ defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
SchedWriteVecALU, 1>;
defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
SchedWriteVecALU, 0>;
-defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
+defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
SchedWriteVecALU, HasBWI, 1>;
-defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
+defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
SchedWriteVecALU, HasBWI, 0>;
defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
SchedWriteVecALU, HasBWI, 1>;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index b72e6f5d59b..0b98abaa7a2 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -227,8 +227,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>;
-def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
-def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>;
def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp, [SDNPCommutative]>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 61b9cf78d83..94cd5a611f2 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3623,9 +3623,9 @@ defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
SchedWriteVecALU, 1, NoVLX>;
defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
SchedWriteVecALU, 1, NoVLX>;
-defm PADDSB : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8,
+defm PADDSB : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PADDSW : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16,
+defm PADDSW : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
@@ -3645,9 +3645,9 @@ defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
SchedWriteVecALU, 0, NoVLX>;
defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
SchedWriteVecALU, 0, NoVLX>;
-defm PSUBSB : PDI_binop_all<0xE8, "psubsb", X86subs, v16i8, v32i8,
+defm PSUBSB : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
-defm PSUBSW : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16,
+defm PSUBSW : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index f1ed8ca48cd..fdcd31c3f9d 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -319,8 +319,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
+ X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0),
+ X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0),
X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
@@ -361,8 +361,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
+ X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
+ X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -920,8 +920,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(avx512_padds_b_512, INTR_TYPE_2OP, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(avx512_padds_w_512, INTR_TYPE_2OP, X86ISD::ADDS, 0),
+ X86_INTRINSIC_DATA(avx512_padds_b_512, INTR_TYPE_2OP, ISD::SADDSAT, 0),
+ X86_INTRINSIC_DATA(avx512_padds_w_512, INTR_TYPE_2OP, ISD::SADDSAT, 0),
X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0),
@@ -1004,8 +1004,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx512_psubs_b_512, INTR_TYPE_2OP, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(avx512_psubs_w_512, INTR_TYPE_2OP, X86ISD::SUBS, 0),
+ X86_INTRINSIC_DATA(avx512_psubs_b_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
+ X86_INTRINSIC_DATA(avx512_psubs_w_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
@@ -1168,8 +1168,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
- X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
- X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
+ X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0),
+ X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0),
X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
@@ -1191,8 +1191,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
- X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),
- X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
+ X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
+ X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT),
OpenPOWER on IntegriCloud