diff options
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 97 |
1 files changed, 89 insertions, 8 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 1ddc63d3200..e68012cee40 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1406,6 +1406,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH"; case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF"; + case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; } return nullptr; } @@ -1778,10 +1779,10 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to -/// VSPLTB/VSPLTH/VSPLTW. +/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.). bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { - assert(N->getValueType(0) == MVT::v16i8 && - (EltSize == 1 || EltSize == 2 || EltSize == 4)); + assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && + EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"); // The consecutive indices need to specify an element, not part of two // different elements. So abandon ship early if this isn't the case. @@ -2074,10 +2075,11 @@ bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, } -/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the -/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. -unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, - SelectionDAG &DAG) { +/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is +/// appropriate for PPC mnemonics (which have a big endian bias - namely +/// elements are counted from the left of the vector register). +unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, + SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); assert(isSplatShuffleMask(SVOp, EltSize)); if (DAG.getDataLayout().isLittleEndian()) @@ -8185,6 +8187,18 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { Op0.getOperand(1)); } +const SDValue *getNormalLoadInput(const SDValue &Op) { + const SDValue *InputLoad = &Op; + if (InputLoad->getOpcode() == ISD::BITCAST) + InputLoad = &InputLoad->getOperand(0); + if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR) + InputLoad = &InputLoad->getOperand(0); + if (InputLoad->getOpcode() != ISD::LOAD) + return nullptr; + LoadSDNode *LD = cast<LoadSDNode>(*InputLoad); + return ISD::isNormalLoad(LD) ? InputLoad : nullptr; +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen @@ -8307,6 +8321,34 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || SplatBitSize > 32) { + + const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0)); + // Handle load-and-splat patterns as we have instructions that will do this + // in one go. + if (InputLoad && DAG.isSplatValue(Op, true)) { + LoadSDNode *LD = cast<LoadSDNode>(*InputLoad); + + // We have handling for 4 and 8 byte elements. + unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits(); + + // Checking for a single use of this load, we have to check for vector + // width (128 bits) / ElementSize uses (since each operand of the + // BUILD_VECTOR is a separate use of the value. + if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) && + ((Subtarget.hasVSX() && ElementSize == 64) || + (Subtarget.hasP9Vector() && ElementSize == 32))) { + SDValue Ops[] = { + LD->getChain(), // Chain + LD->getBasePtr(), // Ptr + DAG.getValueType(Op.getValueType()) // VT + }; + return + DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, + DAG.getVTList(Op.getValueType(), MVT::Other), + Ops, LD->getMemoryVT(), LD->getMemOperand()); + } + } + // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be // lowered to VSX instructions under certain conditions. // Without VSX, there is no pattern more efficient than expanding the node. @@ -8792,6 +8834,45 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, unsigned ShiftElts, InsertAtByte; bool Swap = false; + + // If this is a load-and-splat, we can do that with a single instruction + // in some cases. However if the load has multiple uses, we don't want to + // combine it because that will just produce multiple loads. + const SDValue *InputLoad = getNormalLoadInput(V1); + if (InputLoad && Subtarget.hasVSX() && V2.isUndef() && + (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) && + InputLoad->hasOneUse()) { + bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4); + int SplatIdx = + PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG); + + LoadSDNode *LD = cast<LoadSDNode>(*InputLoad); + // For 4-byte load-and-splat, we need Power9. + if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) { + uint64_t Offset = 0; + if (IsFourByte) + Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4; + else + Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8; + SDValue BasePtr = LD->getBasePtr(); + if (Offset != 0) + BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), + BasePtr, DAG.getIntPtrConstant(Offset, dl)); + SDValue Ops[] = { + LD->getChain(), // Chain + BasePtr, // BasePtr + DAG.getValueType(Op.getValueType()) // VT + }; + SDVTList VTL = + DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other); + SDValue LdSplt = + DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL, + Ops, LD->getMemoryVT(), LD->getMemOperand()); + if (LdSplt.getValueType() != SVOp->getValueType(0)) + LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt); + return LdSplt; + } + } if (Subtarget.hasP9Vector() && PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, isLittleEndian)) { @@ -8868,7 +8949,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { - int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); + int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG); SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, |

