diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 7 |
3 files changed, 35 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 6e0bc97e92b..4923a529c21 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -141,6 +141,7 @@ class VectorLegalizer { SDValue ExpandFunnelShift(SDValue Op); SDValue ExpandROT(SDValue Op); SDValue ExpandFMINNUM_FMAXNUM(SDValue Op); + SDValue ExpandAddSubSat(SDValue Op); SDValue ExpandStrictFPOp(SDValue Op); /// Implements vector promotion. @@ -777,6 +778,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::FMINNUM: case ISD::FMAXNUM: return ExpandFMINNUM_FMAXNUM(Op); + case ISD::USUBSAT: + case ISD::SSUBSAT: + case ISD::UADDSAT: + case ISD::SADDSAT: + return ExpandAddSubSat(Op); case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: @@ -1206,6 +1212,12 @@ SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) { return DAG.UnrollVectorOp(Op.getNode()); } +SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) { + if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG)) + return Expanded; + return DAG.UnrollVectorOp(Op.getNode()); +} + SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { EVT VT = Op.getValueType(); EVT EltVT = VT.getVectorElementType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3c757440367..a2f05c1e3ce 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5277,6 +5277,22 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { unsigned Opcode = Node->getOpcode(); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + EVT VT = LHS.getValueType(); + SDLoc dl(Node); + + // usub.sat(a, b) -> umax(a, b) - b + if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) { + SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS); + return DAG.getNode(ISD::SUB, dl, VT, Max, RHS); + } + + if (VT.isVector()) { + // TODO: Consider not scalarizing here. + return SDValue(); + } + unsigned OverflowOp; switch (Opcode) { case ISD::SADDSAT: @@ -5295,11 +5311,7 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { llvm_unreachable("Expected method to receive signed or unsigned saturation " "addition or subtraction node."); } - assert(Node->getNumOperands() == 2 && "Expected node to have 2 operands."); - SDLoc dl(Node); - SDValue LHS = Node->getOperand(0); - SDValue RHS = Node->getOperand(1); assert(LHS.getValueType().isScalarInteger() && "Expected operands to be integers. Vector of int arguments should " "already be unrolled."); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index a3592565c0f..36929a4f543 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1780,6 +1780,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::CTPOP, MVT::v16i32, 24 }, { ISD::CTTZ, MVT::v8i64, 20 }, { ISD::CTTZ, MVT::v16i32, 28 }, + { ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd + { ISD::USUBSAT, MVT::v2i64, 2 }, // pmaxuq + psubq + { ISD::USUBSAT, MVT::v4i64, 2 }, // pmaxuq + psubq + { ISD::USUBSAT, MVT::v8i64, 2 }, // pmaxuq + psubq }; static const CostTblEntry XOPCostTbl[] = { { ISD::BITREVERSE, MVT::v4i64, 4 }, @@ -1823,6 +1827,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::UADDSAT, MVT::v32i8, 1 }, { ISD::USUBSAT, MVT::v16i16, 1 }, { ISD::USUBSAT, MVT::v32i8, 1 }, + { ISD::USUBSAT, MVT::v8i32, 2 }, // pmaxud + psubd { ISD::FSQRT, MVT::f32, 7 }, // Haswell from http://www.agner.org/ { ISD::FSQRT, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/ { ISD::FSQRT, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/ @@ -1858,6 +1863,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::UADDSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert { ISD::USUBSAT, MVT::v16i16, 4 }, // 2 x 128-bit Op + extract/insert { ISD::USUBSAT, MVT::v32i8, 4 }, // 2 x 128-bit Op + extract/insert + { ISD::USUBSAT, MVT::v8i32, 6 }, // 2 x 128-bit Op + extract/insert { ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/ { ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/ { ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/ @@ -1878,6 +1884,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, { ISD::FSQRT, MVT::v2f64, 70 }, // sqrtpd }; static const CostTblEntry SSE42CostTbl[] = { + { ISD::USUBSAT, MVT::v4i32, 2 }, // pmaxud + psubd { ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/ { ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/ }; |