diff options
author | Nikita Popov <nikita.ppv@gmail.com> | 2018-12-18 13:23:03 +0000 |
---|---|---|
committer | Nikita Popov <nikita.ppv@gmail.com> | 2018-12-18 13:23:03 +0000 |
commit | 665ab08178baefdc6fad9da7e2503072722b71ca (patch) | |
tree | 157bba6f6f508f6c554c6ae81dc8099c8070c09f /llvm/lib/Target | |
parent | a7d2a235bb92b8117d0bbdb30342412e595d3ad0 (diff) | |
download | bcm5719-llvm-665ab08178baefdc6fad9da7e2503072722b71ca.tar.gz bcm5719-llvm-665ab08178baefdc6fad9da7e2503072722b71ca.zip |
[X86] Use UADDSAT/USUBSAT instead of ADDUS/SUBUS
Replace the X86ISD opcodes ADDUS and SUBUS with generic ISD opcodes
UADDSAT and USUBSAT. As a side-effect, this also makes codegen for
the @llvm.uadd.sat and @llvm.usub.sat intrinsics reasonable.
This only replaces use in the X86 backend, and does not move any of
the ADDUS/SUBUS X86 specific combines into generic codegen.
Differential Revision: https://reviews.llvm.org/D55787
llvm-svn: 349481
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 95 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 8 |
5 files changed, 75 insertions, 38 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a6bb174f690..2596a04a18d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -829,6 +829,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom); } + setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal); + setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal); + setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal); + setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal); + // Use widening instead of promotion. + for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8, + MVT::v4i16, MVT::v2i16 }) { + setOperationAction(ISD::UADDSAT, VT, Custom); + setOperationAction(ISD::USUBSAT, VT, Custom); + } + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); @@ -1200,6 +1211,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SMIN, MVT::v4i64, Custom); setOperationAction(ISD::UMIN, MVT::v4i64, Custom); + setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); + setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); + setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); + setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); + for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom); setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom); @@ -1317,6 +1333,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction(ISD::UADDSAT, VT, Custom); + setOperationAction(ISD::USUBSAT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); @@ -1577,6 +1595,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SUB, VT, Custom); setOperationAction(ISD::MUL, VT, Custom); setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::UADDSAT, VT, Custom); + setOperationAction(ISD::USUBSAT, VT, Custom); setOperationAction(ISD::TRUNCATE, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); @@ -1657,6 +1677,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SMIN, VT, Legal); setOperationAction(ISD::UMIN, VT, Legal); setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::UADDSAT, VT, Legal); + setOperationAction(ISD::USUBSAT, VT, Legal); // The condition codes aren't legal in SSE/AVX and under AVX512 we use // setcc all the way to isel and prefer SETGT in some isel patterns. @@ -19147,7 +19169,7 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT, break; } - SDValue Result = DAG.getNode(X86ISD::SUBUS, dl, VT, Op0, Op1); + SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1); return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result, DAG.getConstant(0, dl, VT)); } @@ -23366,6 +23388,26 @@ static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) { return split256IntArith(Op, DAG); } +static SDValue LowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + if (VT.getScalarType() == MVT::i1) { + SDLoc dl(Op); + switch (Op.getOpcode()) { + default: llvm_unreachable("Expected saturated arithmetic opcode"); + case ISD::UADDSAT: + return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1)); + case ISD::USUBSAT: + return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), + DAG.getNOT(dl, Op.getOperand(1), VT)); + } + } + + assert(Op.getSimpleValueType().is256BitVector() && + Op.getSimpleValueType().isInteger() && + "Only handle AVX 256-bit vector integer operation"); + return split256IntArith(Op, DAG); +} + static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) { @@ -26147,6 +26189,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::ADD: case ISD::SUB: return LowerADD_SUB(Op, DAG); + case ISD::UADDSAT: + case ISD::USUBSAT: return LowerUADDSAT_USUBSAT(Op, DAG); case ISD::SMAX: case ISD::SMIN: case ISD::UMAX: @@ -26228,11 +26272,12 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } return; } + case ISD::UADDSAT: + case ISD::USUBSAT: case X86ISD::VPMADDWD: - case X86ISD::ADDUS: - case X86ISD::SUBUS: case X86ISD::AVG: { - // Legalize types for X86ISD::AVG/ADDUS/SUBUS/VPMADDWD by widening. + // Legalize types for ISD::UADDSAT/USUBSAT and X86ISD::AVG/VPMADDWD + // by widening. assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); EVT VT = N->getValueType(0); @@ -26966,8 +27011,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ANDNP: return "X86ISD::ANDNP"; case X86ISD::BLENDI: return "X86ISD::BLENDI"; case X86ISD::SHRUNKBLEND: return "X86ISD::SHRUNKBLEND"; - case X86ISD::ADDUS: return "X86ISD::ADDUS"; - case X86ISD::SUBUS: return "X86ISD::SUBUS"; case X86ISD::HADD: return "X86ISD::HADD"; case X86ISD::HSUB: return "X86ISD::HSUB"; case X86ISD::FHADD: return "X86ISD::FHADD"; @@ -34043,9 +34086,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, SDValue OpLHS = Other->getOperand(0), OpRHS = Other->getOperand(1); SDValue CondRHS = Cond->getOperand(1); - auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL, - ArrayRef<SDValue> Ops) { - return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops); + auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL, + ArrayRef<SDValue> Ops) { + return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops); }; // Look for a general sub with unsigned saturation first. @@ -34054,22 +34097,22 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, if ((CC == ISD::SETUGE || CC == ISD::SETUGT) && Other->getOpcode() == ISD::SUB && OpRHS == CondRHS) return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS }, - SUBUSBuilder); + USUBSATBuilder); if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) { if (isa<BuildVectorSDNode>(CondRHS)) { // If the RHS is a constant we have to reverse the const // canonicalization. // x > C-1 ? x+-C : 0 --> subus x, C - auto MatchSUBUS = [](ConstantSDNode *Op, ConstantSDNode *Cond) { + auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) { return Cond->getAPIntValue() == (-Op->getAPIntValue() - 1); }; if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD && - ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchSUBUS)) { + ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT)) { OpRHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), OpRHS); return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS }, - SUBUSBuilder); + USUBSATBuilder); } // Another special case: If C was a sign bit, the sub has been @@ -34085,7 +34128,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // Note that we have to rebuild the RHS constant here to ensure we // don't rely on particular values of undef lanes. return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS }, - SUBUSBuilder); + USUBSATBuilder); } } } @@ -34118,9 +34161,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, if (Other.getNode() && Other.getOpcode() == ISD::ADD) { SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1); - auto ADDUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL, - ArrayRef<SDValue> Ops) { - return DAG.getNode(X86ISD::ADDUS, DL, Ops[0].getValueType(), Ops); + auto UADDSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL, + ArrayRef<SDValue> Ops) { + return DAG.getNode(ISD::UADDSAT, DL, Ops[0].getValueType(), Ops); }; // Canonicalize condition operands. @@ -34135,20 +34178,20 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, if (CC == ISD::SETULE && Other == CondRHS && (OpLHS == CondLHS || OpRHS == CondLHS)) return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS }, - ADDUSBuilder); + UADDSATBuilder); if (isa<BuildVectorSDNode>(OpRHS) && isa<BuildVectorSDNode>(CondRHS) && CondLHS == OpLHS) { // If the RHS is a constant we have to reverse the const // canonicalization. // x > ~C ? x+C : ~0 --> addus x, C - auto MatchADDUS = [](ConstantSDNode *Op, ConstantSDNode *Cond) { + auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) { return Cond->getAPIntValue() == ~Op->getAPIntValue(); }; if (CC == ISD::SETULE && - ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchADDUS)) + ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT)) return SplitOpsAndApply(DAG, Subtarget, DL, VT, { OpLHS, OpRHS }, - ADDUSBuilder); + UADDSATBuilder); } } } @@ -40764,16 +40807,16 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, } else return SDValue(); - auto SUBUSBuilder = [](SelectionDAG &DAG, const SDLoc &DL, - ArrayRef<SDValue> Ops) { - return DAG.getNode(X86ISD::SUBUS, DL, Ops[0].getValueType(), Ops); + auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL, + ArrayRef<SDValue> Ops) { + return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops); }; // PSUBUS doesn't support v8i32/v8i64/v16i32, but it can be enabled with // special preprocessing in some cases. if (VT != MVT::v8i32 && VT != MVT::v16i32 && VT != MVT::v8i64) return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, - { SubusLHS, SubusRHS }, SUBUSBuilder); + { SubusLHS, SubusRHS }, USUBSATBuilder); // Special preprocessing case can be only applied // if the value was zero extended from 16 bit, @@ -40805,7 +40848,7 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, SDValue NewSubusRHS = DAG.getZExtOrTrunc(UMin, SDLoc(SubusRHS), ShrinkedType); SDValue Psubus = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), ShrinkedType, - { NewSubusLHS, NewSubusRHS }, SUBUSBuilder); + { NewSubusLHS, NewSubusRHS }, USUBSATBuilder); // Zero extend the result, it may be used somewhere as 32 bit, // if not zext and following trunc will shrink. return DAG.getZExtOrTrunc(Psubus, SDLoc(N), ExtType); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index b3ac31f24ea..f95031d87b9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -226,10 +226,6 @@ namespace llvm { SCALEF, SCALEFS, - // Integer add/sub with unsigned saturation. - ADDUS, - SUBUS, - // Integer add/sub with signed saturation. ADDS, SUBS, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 0ab4ed46ad5..93fdb9a130d 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4834,9 +4834,9 @@ defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds, SchedWriteVecALU, HasBWI, 1>; defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs, SchedWriteVecALU, HasBWI, 0>; -defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, +defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat, SchedWriteVecALU, HasBWI, 1>; -defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, +defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", usubsat, SchedWriteVecALU, HasBWI, 0>; defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, SchedWritePMULLD, HasAVX512, 1>, T8PD; diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 9ee5575f0dd..b72e6f5d59b 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -227,8 +227,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; -def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>; -def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>; def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>; def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>; def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 472ee710310..61b9cf78d83 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3627,9 +3627,9 @@ defm PADDSB : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PADDSW : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; -defm PADDUSB : PDI_binop_all<0xDC, "paddusb", X86addus, v16i8, v32i8, +defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; -defm PADDUSW : PDI_binop_all<0xDD, "paddusw", X86addus, v8i16, v16i16, +defm PADDUSW : PDI_binop_all<0xDD, "paddusw", uaddsat, v8i16, v16i16, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; @@ -3649,9 +3649,9 @@ defm PSUBSB : PDI_binop_all<0xE8, "psubsb", X86subs, v16i8, v32i8, SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; defm PSUBSW : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16, SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; -defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8, +defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8, SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; -defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16, +defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", usubsat, v8i16, v16i16, SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8, SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; |