diff options
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 164 |
1 files changed, 129 insertions, 35 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index dd99b71eda6..619b13fe86e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -251,12 +251,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::UREM, MVT::i64, Expand); } - if (Subtarget.hasP9Vector()) { - setOperationAction(ISD::ABS, MVT::v4i32, Legal); - setOperationAction(ISD::ABS, MVT::v8i16, Legal); - setOperationAction(ISD::ABS, MVT::v16i8, Legal); - } - // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); @@ -556,6 +550,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); + setOperationAction(ISD::ABS, VT, Custom); // Vector instructions introduced in P8 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { @@ -661,6 +656,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); + // Without hasP8Altivec set, v2i64 SMAX isn't available. + // But ABS custom lowering requires SMAX support. + if (!Subtarget.hasP8Altivec()) + setOperationAction(ISD::ABS, MVT::v2i64, Expand); + addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); @@ -1083,6 +1083,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTargetDAGCombine(ISD::FSQRT); } + if (Subtarget.hasP9Altivec()) { + setTargetDAGCombine(ISD::ABS); + } + // Darwin long double math library functions have $LDBL128 appended. if (Subtarget.isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); @@ -1343,6 +1347,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::RFEBB: return "PPCISD::RFEBB"; case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; + case PPCISD::VABSD: return "PPCISD::VABSD"; case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; @@ -9003,35 +9008,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getRegister(PPC::R2, MVT::i32); } - // We are looking for absolute values here. - // The idea is to try to fit one of two patterns: - // max (a, (0-a)) OR max ((0-a), a) - if (Subtarget.hasP9Vector() && - (IntrinsicID == Intrinsic::ppc_altivec_vmaxsw || - IntrinsicID == Intrinsic::ppc_altivec_vmaxsh || - IntrinsicID == Intrinsic::ppc_altivec_vmaxsb)) { - SDValue V1 = Op.getOperand(1); - SDValue V2 = Op.getOperand(2); - if (V1.getSimpleValueType() == V2.getSimpleValueType() && - (V1.getSimpleValueType() == MVT::v4i32 || - V1.getSimpleValueType() == MVT::v8i16 || - V1.getSimpleValueType() == MVT::v16i8)) { - if ( V1.getOpcode() == ISD::SUB && - ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) && - V1.getOperand(1) == V2 ) { - // Generate the abs instruction with the operands - return DAG.getNode(ISD::ABS, dl, V2.getValueType(),V2); - } - - if ( V2.getOpcode() == ISD::SUB && - ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) && - V2.getOperand(1) == V1 ) { - // Generate the abs instruction with the operands - return DAG.getNode(ISD::ABS, dl, V1.getValueType(),V1); - } - } - } - // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. int CompareOpc; @@ -9572,6 +9548,44 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { } } +SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { + + assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS"); + + EVT VT = Op.getValueType(); + assert(VT.isVector() && + "Only set vector abs as custom, scalar abs shouldn't reach here!"); + assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || + VT == MVT::v16i8) && + "Unexpected vector element type!"); + assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && + "Current subtarget doesn't support smax v2i64!"); + + // For vector abs, it can be lowered to: + // abs x + // ==> + // y = -x + // smax(x, y) + + SDLoc dl(Op); + SDValue X = Op.getOperand(0); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X); + + // SMAX patch https://reviews.llvm.org/D47332 + // hasn't landed yet, so use intrinsic first here. + // TODO: Should use SMAX directly once SMAX patch landed + Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw; + if (VT == MVT::v2i64) + BifID = Intrinsic::ppc_altivec_vmaxsd; + else if (VT == MVT::v8i16) + BifID = Intrinsic::ppc_altivec_vmaxsh; + else if (VT == MVT::v16i8) + BifID = Intrinsic::ppc_altivec_vmaxsb; + + return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -9624,6 +9638,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); + case ISD::ABS: return LowerABS(Op, DAG); // For counter-based loop handling. case ISD::INTRINSIC_W_CHAIN: return SDValue(); @@ -12985,6 +13000,39 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } } + + // Combine vmaxsw/h/b(a, a's negation) to abs(a) + // Expose the vabsduw/h/b opportunity for down stream + if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() && + (IID == Intrinsic::ppc_altivec_vmaxsw || + IID == Intrinsic::ppc_altivec_vmaxsh || + IID == Intrinsic::ppc_altivec_vmaxsb)) { + SDValue V1 = N->getOperand(1); + SDValue V2 = N->getOperand(2); + if ((V1.getSimpleValueType() == MVT::v4i32 || + V1.getSimpleValueType() == MVT::v8i16 || + V1.getSimpleValueType() == MVT::v16i8) && + V1.getSimpleValueType() == V2.getSimpleValueType()) { + // (0-a, a) + if (V1.getOpcode() == ISD::SUB && + ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) && + V1.getOperand(1) == V2) { + return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2); + } + // (a, 0-a) + if (V2.getOpcode() == ISD::SUB && + ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) && + V2.getOperand(1) == V1) { + return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1); + } + // (x-y, y-x) + if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB && + V1.getOperand(0) == V2.getOperand(1) && + V1.getOperand(1) == V2.getOperand(0)) { + return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1); + } + } + } } break; @@ -13217,6 +13265,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } case ISD::BUILD_VECTOR: return DAGCombineBuildVector(N, DCI); + case ISD::ABS: + return combineABS(N, DCI); } return SDValue(); @@ -14503,3 +14553,47 @@ isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { // For non-constant masks, we can always use the record-form and. return true; } + +// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0) +// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0) +// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0) +// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0) +// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32 +SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const { + assert((N->getOpcode() == ISD::ABS) && "Need ABS node here"); + assert(Subtarget.hasP9Altivec() && + "Only combine this when P9 altivec supported!"); + EVT VT = N->getValueType(0); + if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + if (N->getOperand(0).getOpcode() == ISD::SUB) { + // Even for signed integers, if it's known to be positive (as signed + // integer) due to zero-extended inputs. + unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode(); + unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode(); + if ((SubOpcd0 == ISD::ZERO_EXTEND || + SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) && + (SubOpcd1 == ISD::ZERO_EXTEND || + SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) { + return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(), + N->getOperand(0)->getOperand(0), + N->getOperand(0)->getOperand(1), + DAG.getTargetConstant(0, dl, MVT::i32)); + } + + // For type v4i32, it can be optimized with xvnegsp + vabsduw + if (N->getOperand(0).getValueType() == MVT::v4i32 && + N->getOperand(0).hasOneUse()) { + return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(), + N->getOperand(0)->getOperand(0), + N->getOperand(0)->getOperand(1), + DAG.getTargetConstant(1, dl, MVT::i32)); + } + } + + return SDValue(); +} + |