diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2019-04-12 16:31:56 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2019-04-12 16:31:56 +0000 |
| commit | 5e4ad39af7c257d5d426368efd1bd07f5c2be772 (patch) | |
| tree | 55412e2e5c78993415b8f7b928775fc08765a40f /llvm/lib/CodeGen | |
| parent | 7bd8c37b17730a1a6e76d5543fba0939238b7640 (diff) | |
| download | bcm5719-llvm-5e4ad39af7c257d5d426368efd1bd07f5c2be772.tar.gz bcm5719-llvm-5e4ad39af7c257d5d426368efd1bd07f5c2be772.zip | |
[DAGCombiner] narrow shuffle of concatenated vectors
// shuffle (concat X, undef), (concat Y, undef), Mask -->
// concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
The ARM changes with 'vtrn' and narrowed 'vuzp' are improvements.
The x86 changes look neutral or better. There's one test with an
extra instruction, but that could be reversed for a subtarget with
the right attributes. But by default, we want to avoid the 256-bit
op when possible (in my motivating benchmark, a handful of ymm ops
sprinkled into a sequence of xmm ops are triggering frequency
throttling on Haswell resulting in significantly worse perf).
Differential Revision: https://reviews.llvm.org/D60545
llvm-svn: 358291
Diffstat (limited to 'llvm/lib/CodeGen')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 64f35a3ad7a..9dc33aa3979 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17598,6 +17598,53 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return SDValue(); } +/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles +/// followed by concatenation. Narrow vector ops may have better performance +/// than wide ops, and this can unlock further narrowing of other vector ops. +/// Targets can invert this transform later if it is not profitable. +static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf, + SelectionDAG &DAG) { + SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1); + if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 || + N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 || + !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef()) + return SDValue(); + + // Split the wide shuffle mask into halves. Any mask element that is accessing + // operand 1 is offset down to account for narrowing of the vectors. + ArrayRef<int> Mask = Shuf->getMask(); + EVT VT = Shuf->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + unsigned HalfNumElts = NumElts / 2; + SmallVector<int, 16> Mask0(HalfNumElts, -1); + SmallVector<int, 16> Mask1(HalfNumElts, -1); + for (unsigned i = 0; i != NumElts; ++i) { + if (Mask[i] == -1) + continue; + int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts; + if (i < HalfNumElts) + Mask0[i] = M; + else + Mask1[i - HalfNumElts] = M; + } + + // Ask the target if this is a valid transform. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + HalfNumElts); + if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) || + !TLI.isShuffleMaskLegal(Mask1, HalfVT)) + return SDValue(); + + // shuffle (concat X, undef), (concat Y, undef), Mask --> + // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1) + SDValue X = N0.getOperand(0), Y = N1.getOperand(0); + SDLoc DL(Shuf); + SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0); + SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1); +} + // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, // or turn a shuffle of a single concat into simpler shuffle then concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { @@ -18379,6 +18426,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); } + if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG)) + return V; + return SDValue(); } |

