summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorNikita Popov <nikita.ppv@gmail.com>2018-12-18 13:23:03 +0000
committerNikita Popov <nikita.ppv@gmail.com>2018-12-18 13:23:03 +0000
commit665ab08178baefdc6fad9da7e2503072722b71ca (patch)
tree157bba6f6f508f6c554c6ae81dc8099c8070c09f /llvm/lib/Target
parenta7d2a235bb92b8117d0bbdb30342412e595d3ad0 (diff)
downloadbcm5719-llvm-665ab08178baefdc6fad9da7e2503072722b71ca.tar.gz
bcm5719-llvm-665ab08178baefdc6fad9da7e2503072722b71ca.zip
[X86] Use UADDSAT/USUBSAT instead of ADDUS/SUBUS
Replace the X86ISD opcodes ADDUS and SUBUS with generic ISD opcodes UADDSAT and USUBSAT. As a side-effect, this also makes codegen for the @llvm.uadd.sat and @llvm.usub.sat intrinsics reasonable. This only replaces use in the X86 backend, and does not move any of the ADDUS/SUBUS X86 specific combines into generic codegen. Differential Revision: https://reviews.llvm.org/D55787 llvm-svn: 349481
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp95
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td4
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td2
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td8
5 files changed, 75 insertions, 38 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a6bb174f690..2596a04a18d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -829,6 +829,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
}
+ setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
+ setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
+ // Use widening instead of promotion.
+ for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
+ MVT::v4i16, MVT::v2i16 }) {
+ setOperationAction(ISD::UADDSAT, VT, Custom);
+ setOperationAction(ISD::USUBSAT, VT, Custom);
+ }
+
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
@@ -1200,6 +1211,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
+ setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
+
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
@@ -1317,6 +1333,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
+ setOperationAction(ISD::UADDSAT, VT, Custom);
+ setOperationAction(ISD::USUBSAT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -1577,6 +1595,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::UADDSAT, VT, Custom);
+ setOperationAction(ISD::USUBSAT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
@@ -1657,6 +1677,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::UADDSAT, VT, Legal);
+ setOperationAction(ISD::USUBSAT, VT, Legal);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -19147,7 +19169,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
break;
}
- SDValue Result = DAG.getNode(X86ISD::SUBUS, dl, VT, Op0, Op1);
+ SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1);
return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result,
DAG.getConstant(0, dl, VT));
}
@@ -23366,6 +23388,26 @@ static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
return split256IntArith(Op, DAG);
}
+static SDValue LowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ if (VT.getScalarType() == MVT::i1) {
+ SDLoc dl(Op);
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Expected saturated arithmetic opcode");
+ case ISD::UADDSAT:
+ return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1));
+ case ISD::USUBSAT:
+ return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
+ DAG.getNOT(dl, Op.getOperand(1), VT));
+ }
+ }
+
+ assert(Op.getSimpleValueType().is256BitVector() &&
+ Op.getSimpleValueType().isInteger() &&
+ "Only handle AVX 256-bit vector integer operation");
+ return split256IntArith(Op, DAG);
+}
+
static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) {
@@ -26147,6 +26189,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
case ISD::ADD:
case ISD::SUB: return LowerADD_SUB(Op, DAG);
+ case ISD::UADDSAT:
+ case ISD::USUBSAT: return LowerUADDSAT_USUBSAT(Op, DAG);
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
@@ -26228,11 +26272,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
}
return;
}
+ case ISD::UADDSAT:
+ case ISD::USUBSAT:
case X86ISD::VPMADDWD:
- case X86ISD::ADDUS:
- case X86ISD::SUBUS:
case X86ISD::AVG: {
- // Legalize types for X86ISD::AVG/ADDUS/SUBUS/VPMADDWD by widening.
+ // Legalize types for ISD::UADDSAT/USUBSAT and X86ISD::AVG/VPMADDWD
+ // by widening.
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
EVT VT = N->getValueType(0);
@@ -26966,8 +27011,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::ANDNP: return "X86ISD::ANDNP";
case X86ISD::BLENDI: return "X86ISD::BLENDI";
case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND";
- case X86ISD::ADDUS: return "X86ISD::ADDUS";
- case X86ISD::SUBUS: return "X86ISD::SUBUS";
case X86ISD::HADD: return "X86ISD::HADD";
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
@@ -34043,9 +34086,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1);
SDValue CondRHS = Cond->getOperand(1);
- auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
- ArrayRef<SDValue> Ops) {
- return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops);
+ auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
+ ArrayRef<SDValue> Ops) {
+ return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops);
};
// Look for a general sub with unsigned saturation first.
@@ -34054,22 +34097,22 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if ((CC == ISD::SETUGE || CC == ISD::SETUGT) &&
Other->getOpcode() == ISD::SUB && OpRHS == CondRHS)
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
- SUBUSBuilder);
+ USUBSATBuilder);
if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) {
if (isa<BuildVectorSDNode>(CondRHS)) {
// If the RHS is a constant we have to reverse the const
// canonicalization.
// x > C-1 ? x+-C : 0 --> subus x, C
- auto MatchSUBUS = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
return Cond->getAPIntValue() == (-Op->getAPIntValue() - 1);
};
if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
- ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchSUBUS)) {
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT)) {
OpRHS = DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), OpRHS);
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
- SUBUSBuilder);
+ USUBSATBuilder);
}
// Another special case: If C was a sign bit, the sub has been
@@ -34085,7 +34128,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Note that we have to rebuild the RHS constant here to ensure we
// don't rely on particular values of undef lanes.
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
- SUBUSBuilder);
+ USUBSATBuilder);
}
}
}
@@ -34118,9 +34161,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (Other.getNode() && Other.getOpcode() == ISD::ADD) {
SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
- auto ADDUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
- ArrayRef<SDValue> Ops) {
- return DAG.getNode(X86ISD::ADDUS, DL, Ops[0].getValueType(), Ops);
+ auto UADDSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
+ ArrayRef<SDValue> Ops) {
+ return DAG.getNode(ISD::UADDSAT, DL, Ops[0].getValueType(), Ops);
};
// Canonicalize condition operands.
@@ -34135,20 +34178,20 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (CC == ISD::SETULE && Other == CondRHS &&
(OpLHS == CondLHS || OpRHS == CondLHS))
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
- ADDUSBuilder);
+ UADDSATBuilder);
if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) &&
CondLHS == OpLHS) {
// If the RHS is a constant we have to reverse the const
// canonicalization.
// x > ~C ? x+C : ~0 --> addus x, C
- auto MatchADDUS = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
+ auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
return Cond->getAPIntValue() == ~Op->getAPIntValue();
};
if (CC == ISD::SETULE &&
- ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchADDUS))
+ ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS },
- ADDUSBuilder);
+ UADDSATBuilder);
}
}
}
@@ -40764,16 +40807,16 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
} else
return SDValue();
- auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
- ArrayRef<SDValue> Ops) {
- return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops);
+ auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
+ ArrayRef<SDValue> Ops) {
+ return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops);
};
// PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with
// special preprocessing in some cases.
if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64)
return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT,
- { SubusLHS, SubusRHS }, SUBUSBuilder);
+ { SubusLHS, SubusRHS }, USUBSATBuilder);
// Special preprocessing case can be only applied
// if the value was zero extended from 16 bit,
@@ -40805,7 +40848,7 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType);
SDValue Psubus =
SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType,
- { NewSubusLHS, NewSubusRHS }, SUBUSBuilder);
+ { NewSubusLHS, NewSubusRHS }, USUBSATBuilder);
// Zero extend the result, it may be used somewhere as 32 bit,
// if not zext and following trunc will shrink.
return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index b3ac31f24ea..f95031d87b9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -226,10 +226,6 @@ namespace llvm {
SCALEF,
SCALEFS,
- // Integer add/sub with unsigned saturation.
- ADDUS,
- SUBUS,
-
// Integer add/sub with signed saturation.
ADDS,
SUBS,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 0ab4ed46ad5..93fdb9a130d 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4834,9 +4834,9 @@ defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
SchedWriteVecALU, HasBWI, 1>;
defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
SchedWriteVecALU, HasBWI, 0>;
-defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
+defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
SchedWriteVecALU, HasBWI, 1>;
-defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
+defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat,
SchedWriteVecALU, HasBWI, 0>;
defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
SchedWritePMULLD, HasAVX512, 1>, T8PD;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 9ee5575f0dd..b72e6f5d59b 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -227,8 +227,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>;
-def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>;
-def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 472ee710310..61b9cf78d83 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3627,9 +3627,9 @@ defm PADDSB : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PADDSW : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PADDUSB : PDI_binop_all<0xDC, "paddusb", X86addus, v16i8, v32i8,
+defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
-defm PADDUSW : PDI_binop_all<0xDD, "paddusw", X86addus, v8i16, v16i16,
+defm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>;
@@ -3649,9 +3649,9 @@ defm PSUBSB : PDI_binop_all<0xE8, "psubsb", X86subs, v16i8, v32i8,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
defm PSUBSW : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
-defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
+defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
-defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
+defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
OpenPOWER on IntegriCloud