diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 64f35a3ad7a..9dc33aa3979 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17598,6 +17598,53 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return SDValue(); } +/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles +/// followed by concatenation. Narrow vector ops may have better performance +/// than wide ops, and this can unlock further narrowing of other vector ops. +/// Targets can invert this transform later if it is not profitable. +static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, + SelectionDAG &DAG) { + SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1); + if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 || + N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 || + !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef()) + return SDValue(); + + // Split the wide shuffle mask into halves. Any mask element that is accessing + // operand 1 is offset down to account for narrowing of the vectors. + ArrayRef<int> Mask = Shuf->getMask(); + EVT VT = Shuf->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + unsigned HalfNumElts = NumElts / 2; + SmallVector<int, 16> Mask0(HalfNumElts, -1); + SmallVector<int, 16> Mask1(HalfNumElts, -1); + for (unsigned i = 0; i != NumElts; ++i) { + if (Mask[i] == -1) + continue; + int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts; + if (i < HalfNumElts) + Mask0[i] = M; + else + Mask1[i - HalfNumElts] = M; + } + + // Ask the target if this is a valid transform. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + HalfNumElts); + if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) || + !TLI.isShuffleMaskLegal(Mask1, HalfVT)) + return SDValue(); + + // shuffle (concat X, undef), (concat Y, undef), Mask --> + // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1) + SDValue X = N0.getOperand(0), Y = N1.getOperand(0); + SDLoc DL(Shuf); + SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0); + SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1); +} + // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, // or turn a shuffle of a single concat into simpler shuffle then concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { @@ -18379,6 +18426,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); } + if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG)) + return V; + return SDValue(); } |

