summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCISelLowering.cpp')
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp97
1 files changed, 89 insertions, 8 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 1ddc63d3200..e68012cee40 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1406,6 +1406,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
+ case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
}
return nullptr;
}
@@ -1778,10 +1779,10 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
-/// VSPLTB/VSPLTH/VSPLTW.
+/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
- assert(N->getValueType(0) == MVT::v16i8 &&
- (EltSize == 1 || EltSize == 2 || EltSize == 4));
+ assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
+ EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
// The consecutive indices need to specify an element, not part of two
// different elements. So abandon ship early if this isn't the case.
@@ -2074,10 +2075,11 @@ bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
}
-/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
-/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
-unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
- SelectionDAG &DAG) {
+/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
+/// appropriate for PPC mnemonics (which have a big endian bias - namely
+/// elements are counted from the left of the vector register).
+unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
+ SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
if (DAG.getDataLayout().isLittleEndian())
@@ -8185,6 +8187,18 @@ SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
Op0.getOperand(1));
}
+const SDValue *getNormalLoadInput(const SDValue &Op) {
+ const SDValue *InputLoad = &Op;
+ if (InputLoad->getOpcode() == ISD::BITCAST)
+ InputLoad = &InputLoad->getOperand(0);
+ if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ InputLoad = &InputLoad->getOperand(0);
+ if (InputLoad->getOpcode() != ISD::LOAD)
+ return nullptr;
+ LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
+ return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -8307,6 +8321,34 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
SplatBitSize > 32) {
+
+ const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0));
+ // Handle load-and-splat patterns as we have instructions that will do this
+ // in one go.
+ if (InputLoad && DAG.isSplatValue(Op, true)) {
+ LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
+
+ // We have handling for 4 and 8 byte elements.
+ unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
+
+ // Checking for a single use of this load, we have to check for vector
+ // width (128 bits) / ElementSize uses (since each operand of the
+ // BUILD_VECTOR is a separate use of the value.
+ if (InputLoad->getNode()->hasNUsesOfValue(128 / ElementSize, 0) &&
+ ((Subtarget.hasVSX() && ElementSize == 64) ||
+ (Subtarget.hasP9Vector() && ElementSize == 32))) {
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr(), // Ptr
+ DAG.getValueType(Op.getValueType()) // VT
+ };
+ return
+ DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl,
+ DAG.getVTList(Op.getValueType(), MVT::Other),
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
+ }
+ }
+
// BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
// lowered to VSX instructions under certain conditions.
// Without VSX, there is no pattern more efficient than expanding the node.
@@ -8792,6 +8834,45 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
unsigned ShiftElts, InsertAtByte;
bool Swap = false;
+
+ // If this is a load-and-splat, we can do that with a single instruction
+ // in some cases. However if the load has multiple uses, we don't want to
+ // combine it because that will just produce multiple loads.
+ const SDValue *InputLoad = getNormalLoadInput(V1);
+ if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
+ (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
+ InputLoad->hasOneUse()) {
+ bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
+ int SplatIdx =
+ PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
+
+ LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
+ // For 4-byte load-and-splat, we need Power9.
+ if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
+ uint64_t Offset = 0;
+ if (IsFourByte)
+ Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
+ else
+ Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
+ SDValue BasePtr = LD->getBasePtr();
+ if (Offset != 0)
+ BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
+ BasePtr, DAG.getIntPtrConstant(Offset, dl));
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ BasePtr, // BasePtr
+ DAG.getValueType(Op.getValueType()) // VT
+ };
+ SDVTList VTL =
+ DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
+ SDValue LdSplt =
+ DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
+ if (LdSplt.getValueType() != SVOp->getValueType(0))
+ LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
+ return LdSplt;
+ }
+ }
if (Subtarget.hasP9Vector() &&
PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
isLittleEndian)) {
@@ -8868,7 +8949,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
- int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
+ int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
OpenPOWER on IntegriCloud