diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 95 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 164 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 17 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrVSX.td | 23 |
4 files changed, 169 insertions, 130 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 7d9ea93f533..6fb1b881d17 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -327,7 +327,6 @@ private: bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; void transferMemOperands(SDNode *N, SDNode *Result); - MachineSDNode *flipSignBit(const SDValue &N, SDNode **SignBit = nullptr); }; } // end anonymous namespace @@ -4138,51 +4137,6 @@ void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); } -/// This method returns a node after flipping the MSB of each element -/// of vector integer type. Additionally, if SignBitVec is non-null, -/// this method sets a node with one at MSB of all elements -/// and zero at other bits in SignBitVec. -MachineSDNode * -PPCDAGToDAGISel::flipSignBit(const SDValue &N, SDNode **SignBitVec) { - SDLoc dl(N); - EVT VecVT = N.getValueType(); - if (VecVT == MVT::v4i32) { - if (SignBitVec) { - SDNode *ZV = CurDAG->getMachineNode(PPC::V_SET0, dl, MVT::v4i32); - *SignBitVec = CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, - SDValue(ZV, 0)); - } - return CurDAG->getMachineNode(PPC::XVNEGSP, dl, VecVT, N); - } - else if (VecVT == MVT::v8i16) { - SDNode *Hi = CurDAG->getMachineNode(PPC::LIS, dl, MVT::i32, - getI32Imm(0x8000, dl)); - SDNode *ScaImm = CurDAG->getMachineNode(PPC::ORI, dl, MVT::i32, - SDValue(Hi, 0), - getI32Imm(0x8000, dl)); - SDNode *VecImm = CurDAG->getMachineNode(PPC::MTVSRWS, dl, VecVT, - SDValue(ScaImm, 0)); - /* - Alternatively, we can do this as follow to use VRF instead of GPR. - vspltish 5, 1 - vspltish 6, 15 - vslh 5, 6, 5 - */ - if (SignBitVec) *SignBitVec = VecImm; - return CurDAG->getMachineNode(PPC::VADDUHM, dl, VecVT, N, - SDValue(VecImm, 0)); - } - else if (VecVT == MVT::v16i8) { - SDNode *VecImm = CurDAG->getMachineNode(PPC::XXSPLTIB, dl, MVT::i32, - getI32Imm(0x80, dl)); - if (SignBitVec) *SignBitVec = VecImm; - return CurDAG->getMachineNode(PPC::VADDUBM, dl, VecVT, N, - SDValue(VecImm, 0)); - } - else - llvm_unreachable("Unsupported vector data type for flipSignBit"); -} - // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. void PPCDAGToDAGISel::Select(SDNode *N) { @@ -4993,55 +4947,6 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } } - case ISD::ABS: { - assert(PPCSubTarget->hasP9Vector() && "ABS is supported with P9 Vector"); - - // For vector absolute difference, we use VABSDUW instruction of POWER9. - // Since VABSDU instructions are for unsigned integers, we need adjustment - // for signed integers. - // For abs(sub(a, b)), we generate VABSDUW(a+0x80000000, b+0x80000000). - // Otherwise, abs(sub(-1, 0)) returns 0xFFFFFFFF(=-1) instead of 1. - // For abs(a), we generate VABSDUW(a+0x80000000, 0x80000000). - EVT VecVT = N->getOperand(0).getValueType(); - SDNode *AbsOp = nullptr; - unsigned AbsOpcode; - - if (VecVT == MVT::v4i32) - AbsOpcode = PPC::VABSDUW; - else if (VecVT == MVT::v8i16) - AbsOpcode = PPC::VABSDUH; - else if (VecVT == MVT::v16i8) - AbsOpcode = PPC::VABSDUB; - else - llvm_unreachable("Unsupported vector data type for ISD::ABS"); - - // Even for signed integers, we can skip adjustment if all values are - // known to be positive (as signed integer) due to zero-extended inputs. - if (N->getOperand(0).getOpcode() == ISD::SUB && - N->getOperand(0)->getOperand(0).getOpcode() == ISD::ZERO_EXTEND && - N->getOperand(0)->getOperand(1).getOpcode() == ISD::ZERO_EXTEND) { - AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, - SDValue(N->getOperand(0)->getOperand(0)), - SDValue(N->getOperand(0)->getOperand(1))); - ReplaceNode(N, AbsOp); - return; - } - if (N->getOperand(0).getOpcode() == ISD::SUB) { - SDValue SubVal = N->getOperand(0); - SDNode *Op0 = flipSignBit(SubVal->getOperand(0)); - SDNode *Op1 = flipSignBit(SubVal->getOperand(1)); - AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, - SDValue(Op0, 0), SDValue(Op1, 0)); - } - else { - SDNode *Op1 = nullptr; - SDNode *Op0 = flipSignBit(N->getOperand(0), &Op1); - AbsOp = CurDAG->getMachineNode(AbsOpcode, dl, VecVT, SDValue(Op0, 0), - SDValue(Op1, 0)); - } - ReplaceNode(N, AbsOp); - return; - } } SelectCode(N); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index dd99b71eda6..619b13fe86e 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -251,12 +251,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::UREM, MVT::i64, Expand); } - if (Subtarget.hasP9Vector()) { - setOperationAction(ISD::ABS, MVT::v4i32, Legal); - setOperationAction(ISD::ABS, MVT::v8i16, Legal); - setOperationAction(ISD::ABS, MVT::v16i8, Legal); - } - // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); @@ -556,6 +550,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); + setOperationAction(ISD::ABS, VT, Custom); // Vector instructions introduced in P8 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { @@ -661,6 +656,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); + // Without hasP8Altivec set, v2i64 SMAX isn't available. + // But ABS custom lowering requires SMAX support. + if (!Subtarget.hasP8Altivec()) + setOperationAction(ISD::ABS, MVT::v2i64, Expand); + addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); @@ -1083,6 +1083,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTargetDAGCombine(ISD::FSQRT); } + if (Subtarget.hasP9Altivec()) { + setTargetDAGCombine(ISD::ABS); + } + // Darwin long double math library functions have $LDBL128 appended. if (Subtarget.isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); @@ -1343,6 +1347,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::RFEBB: return "PPCISD::RFEBB"; case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; + case PPCISD::VABSD: return "PPCISD::VABSD"; case PPCISD::QVFPERM: return "PPCISD::QVFPERM"; case PPCISD::QVGPCI: return "PPCISD::QVGPCI"; case PPCISD::QVALIGNI: return "PPCISD::QVALIGNI"; @@ -9003,35 +9008,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getRegister(PPC::R2, MVT::i32); } - // We are looking for absolute values here. - // The idea is to try to fit one of two patterns: - // max (a, (0-a)) OR max ((0-a), a) - if (Subtarget.hasP9Vector() && - (IntrinsicID == Intrinsic::ppc_altivec_vmaxsw || - IntrinsicID == Intrinsic::ppc_altivec_vmaxsh || - IntrinsicID == Intrinsic::ppc_altivec_vmaxsb)) { - SDValue V1 = Op.getOperand(1); - SDValue V2 = Op.getOperand(2); - if (V1.getSimpleValueType() == V2.getSimpleValueType() && - (V1.getSimpleValueType() == MVT::v4i32 || - V1.getSimpleValueType() == MVT::v8i16 || - V1.getSimpleValueType() == MVT::v16i8)) { - if ( V1.getOpcode() == ISD::SUB && - ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) && - V1.getOperand(1) == V2 ) { - // Generate the abs instruction with the operands - return DAG.getNode(ISD::ABS, dl, V2.getValueType(),V2); - } - - if ( V2.getOpcode() == ISD::SUB && - ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) && - V2.getOperand(1) == V1 ) { - // Generate the abs instruction with the operands - return DAG.getNode(ISD::ABS, dl, V1.getValueType(),V1); - } - } - } - // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. int CompareOpc; @@ -9572,6 +9548,44 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { } } +SDValue PPCTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { + + assert(Op.getOpcode() == ISD::ABS && "Should only be called for ISD::ABS"); + + EVT VT = Op.getValueType(); + assert(VT.isVector() && + "Only set vector abs as custom, scalar abs shouldn't reach here!"); + assert((VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || + VT == MVT::v16i8) && + "Unexpected vector element type!"); + assert((VT != MVT::v2i64 || Subtarget.hasP8Altivec()) && + "Current subtarget doesn't support smax v2i64!"); + + // For vector abs, it can be lowered to: + // abs x + // ==> + // y = -x + // smax(x, y) + + SDLoc dl(Op); + SDValue X = Op.getOperand(0); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue Y = DAG.getNode(ISD::SUB, dl, VT, Zero, X); + + // SMAX patch https://reviews.llvm.org/D47332 + // hasn't landed yet, so use intrinsic first here. + // TODO: Should use SMAX directly once SMAX patch landed + Intrinsic::ID BifID = Intrinsic::ppc_altivec_vmaxsw; + if (VT == MVT::v2i64) + BifID = Intrinsic::ppc_altivec_vmaxsd; + else if (VT == MVT::v8i16) + BifID = Intrinsic::ppc_altivec_vmaxsh; + else if (VT == MVT::v16i8) + BifID = Intrinsic::ppc_altivec_vmaxsb; + + return BuildIntrinsicOp(BifID, X, Y, DAG, dl, VT); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -9624,6 +9638,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); + case ISD::ABS: return LowerABS(Op, DAG); // For counter-based loop handling. case ISD::INTRINSIC_W_CHAIN: return SDValue(); @@ -12985,6 +13000,39 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } } + + // Combine vmaxsw/h/b(a, a's negation) to abs(a) + // Expose the vabsduw/h/b opportunity for down stream + if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() && + (IID == Intrinsic::ppc_altivec_vmaxsw || + IID == Intrinsic::ppc_altivec_vmaxsh || + IID == Intrinsic::ppc_altivec_vmaxsb)) { + SDValue V1 = N->getOperand(1); + SDValue V2 = N->getOperand(2); + if ((V1.getSimpleValueType() == MVT::v4i32 || + V1.getSimpleValueType() == MVT::v8i16 || + V1.getSimpleValueType() == MVT::v16i8) && + V1.getSimpleValueType() == V2.getSimpleValueType()) { + // (0-a, a) + if (V1.getOpcode() == ISD::SUB && + ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) && + V1.getOperand(1) == V2) { + return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2); + } + // (a, 0-a) + if (V2.getOpcode() == ISD::SUB && + ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) && + V2.getOperand(1) == V1) { + return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1); + } + // (x-y, y-x) + if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB && + V1.getOperand(0) == V2.getOperand(1) && + V1.getOperand(1) == V2.getOperand(0)) { + return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1); + } + } + } } break; @@ -13217,6 +13265,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } case ISD::BUILD_VECTOR: return DAGCombineBuildVector(N, DCI); + case ISD::ABS: + return combineABS(N, DCI); } return SDValue(); @@ -14503,3 +14553,47 @@ isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { // For non-constant masks, we can always use the record-form and. return true; } + +// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0) +// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0) +// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0) +// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0) +// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32 +SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const { + assert((N->getOpcode() == ISD::ABS) && "Need ABS node here"); + assert(Subtarget.hasP9Altivec() && + "Only combine this when P9 altivec supported!"); + EVT VT = N->getValueType(0); + if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + if (N->getOperand(0).getOpcode() == ISD::SUB) { + // Even for signed integers, if it's known to be positive (as signed + // integer) due to zero-extended inputs. + unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode(); + unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode(); + if ((SubOpcd0 == ISD::ZERO_EXTEND || + SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) && + (SubOpcd1 == ISD::ZERO_EXTEND || + SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) { + return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(), + N->getOperand(0)->getOperand(0), + N->getOperand(0)->getOperand(1), + DAG.getTargetConstant(0, dl, MVT::i32)); + } + + // For type v4i32, it can be optimized with xvnegsp + vabsduw + if (N->getOperand(0).getValueType() == MVT::v4i32 && + N->getOperand(0).hasOneUse()) { + return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(), + N->getOperand(0)->getOperand(0), + N->getOperand(0)->getOperand(1), + DAG.getTargetConstant(1, dl, MVT::i32)); + } + } + + return SDValue(); +} + diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 7a6c22fb9cf..d9884dd24b7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -373,6 +373,21 @@ namespace llvm { /// An SDNode for swaps that are not associated with any loads/stores /// and thereby have no chain. SWAP_NO_CHAIN, + + /// An SDNode for Power9 vector absolute value difference. + /// operand #0 vector + /// operand #1 vector + /// operand #2 constant i32 0 or 1, to indicate whether needs to patch + /// the most significant bit for signed i32 + /// + /// Power9 VABSD* instructions are designed to support unsigned integer + /// vectors (byte/halfword/word), if we want to make use of them for signed + /// integer vectors, we have to flip their sign bits first. To flip sign bit + /// for byte/halfword integer vector would become inefficient, but for word + /// integer vector, we can leverage XVNEGSP to make it efficiently. eg: + /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000) + /// => VABSDUW((XVNEGSP a), (XVNEGSP b)) + VABSD, /// QVFPERM = This corresponds to the QPX qvfperm instruction. QVFPERM, @@ -998,6 +1013,7 @@ namespace llvm { SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; @@ -1101,6 +1117,7 @@ namespace llvm { SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const; /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces /// SETCC with integer subtraction when (1) there is a legal way of doing it diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index bae25ad830c..8321d7f2ecf 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -67,6 +67,10 @@ def SDT_PPCxxswapd : SDTypeProfile<1, 1, [ def SDTVecConv : SDTypeProfile<1, 2, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2> ]>; +def SDTVabsd : SDTypeProfile<1, 3, [ + SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32> +]>; + def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; @@ -79,6 +83,7 @@ def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>; def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>; def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>; +def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>; multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase, string asmstr, InstrItinClass itin, Intrinsic Int, @@ -4017,3 +4022,21 @@ let AddedComplexity = 400 in { } } +// Put this P9Altivec related definition here since it's possible to be +// selected to VSX instruction xvnegsp, avoid possible undef. +let Predicates = [HasP9Altivec] in { + + def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))), + (v4i32 (VABSDUW $A, $B))>; + + def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))), + (v8i16 (VABSDUH $A, $B))>; + + def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))), + (v16i8 (VABSDUB $A, $B))>; + + // As PPCVABSD description, the last operand indicates whether do the + // sign bit flip. + def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))), + (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>; +} |