diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 66 |
1 files changed, 39 insertions, 27 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index ec10968874e..e45ca4dbc0d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4110,6 +4110,7 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { // shuffle in combination with VEXTs. SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); @@ -4158,35 +4159,47 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; int VEXTOffsets[2] = { 0, 0 }; + int OffsetMultipliers[2] = { 1, 1 }; // This loop extracts the usage patterns of the source vectors // and prepares appropriate SDValues for a shuffle if possible. for (unsigned i = 0; i < SourceVecs.size(); ++i) { - if (SourceVecs[i].getValueType() == VT) { + unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements(); + SDValue CurSource = SourceVecs[i]; + if (SourceVecs[i].getValueType().getVectorElementType() != + VT.getVectorElementType()) { + // It may hit this case if SourceVecs[i] is AssertSext/AssertZext. + // Then bitcast it to the vector which holds asserted element type, + // and record the multiplier of element width between SourceVecs and + // Build_vector which is needed to extract the correct lanes later. + EVT CastVT = + EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + SourceVecs[i].getValueSizeInBits() / + VT.getVectorElementType().getSizeInBits()); + + CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]); + OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts; + NumSrcElts *= OffsetMultipliers[i]; + MaxElts[i] *= OffsetMultipliers[i]; + MinElts[i] *= OffsetMultipliers[i]; + } + + if (CurSource.getValueType() == VT) { // No VEXT necessary - ShuffleSrcs[i] = SourceVecs[i]; + ShuffleSrcs[i] = CurSource; VEXTOffsets[i] = 0; continue; - } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { + } else if (NumSrcElts < NumElts) { // We can pad out the smaller vector for free, so if it's part of a // shuffle... - ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i], - DAG.getUNDEF(SourceVecs[i].getValueType())); + ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource, + DAG.getUNDEF(CurSource.getValueType())); continue; } - // Don't attempt to extract subvectors from BUILD_VECTOR sources - // that expand or trunc the original value. - // TODO: We can try to bitcast and ANY_EXTEND the result but - // we need to consider the cost of vector ANY_EXTEND, and the - // legality of all the types. - if (SourceVecs[i].getValueType().getVectorElementType() != - VT.getVectorElementType()) - return SDValue(); - // Since only 64-bit and 128-bit vectors are legal on ARM and // we've eliminated the other cases... - assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts && + assert(NumSrcElts == 2 * NumElts && "unexpected vector sizes in ReconstructShuffle"); if (MaxElts[i] - MinElts[i] >= NumElts) { @@ -4197,22 +4210,20 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, if (MinElts[i] >= NumElts) { // The extraction can just take the second half VEXTOffsets[i] = NumElts; - ShuffleSrcs[i] = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], - DAG.getIntPtrConstant(NumElts)); + ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(NumElts)); } else if (MaxElts[i] < NumElts) { // The extraction can just take the first half VEXTOffsets[i] = 0; - ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], DAG.getIntPtrConstant(0)); + ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(0)); } else { // An actual VEXT is needed VEXTOffsets[i] = MinElts[i]; - SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], DAG.getIntPtrConstant(0)); - SDValue VEXTSrc2 = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], - DAG.getIntPtrConstant(NumElts)); + SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(0)); + SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(NumElts)); unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1); ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2, DAG.getConstant(Imm, MVT::i32)); @@ -4232,9 +4243,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue(); if (ExtractVec == SourceVecs[0]) { - Mask.push_back(ExtractElt - VEXTOffsets[0]); + Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]); } else { - Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); + Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts - + VEXTOffsets[1]); } } |