summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorMichael Kuperstein <mkuper@google.com>2016-09-01 21:32:09 +0000
committerMichael Kuperstein <mkuper@google.com>2016-09-01 21:32:09 +0000
commit5f17d08f498167c1523136b3e62f51588769152d (patch)
tree8bc52e8b0d24f9fa1d17b95b08cdf0e7f82aeecd /llvm/lib/CodeGen
parente68b47070f617ffa8becd3c9d777a93cf2c3b6a8 (diff)
downloadbcm5719-llvm-5f17d08f498167c1523136b3e62f51588769152d.tar.gz
bcm5719-llvm-5f17d08f498167c1523136b3e62f51588769152d.zip
[SelectionDAG] Generate vector_shuffle nodes for undersized result vector sizes
Prior to this, we could generate a vector_shuffle from an IR shuffle when the size of the result was exactly the sum of the sizes of the input vectors. If the output vector was narrower - e.g. a <12 x i8> being formed by a shuffle with two <8 x i8> inputs - we would lower the shuffle to a sequence of extracts and inserts. Instead, we can form a larger vector_shuffle, and then extract a subvector of the right size - e.g. shuffle the two <8 x i8> inputs into a <16 x i8> and then extract a <12 x i8>. This also includes a target-specific X86 combine that in the presence of AVX2 combines: (vector_shuffle <mask> (concat_vectors t1, undef) (concat_vectors t2, undef)) into: (vector_shuffle <mask> (concat_vectors t1, t2), undef) in cases where this allows us to form VPERMD/VPERMQ. (This is not a separate commit, as that pattern does not appear without the DAGBuilder change.) llvm-svn: 280418
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp108
1 files changed, 63 insertions, 45 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2455a30554d..6ebafde7655 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3013,48 +3013,55 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
}
// Normalize the shuffle vector since mask and vector length don't match.
- if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
- // Mask is longer than the source vectors and is a multiple of the source
- // vectors. We can use concatenate vector to make the mask and vectors
- // lengths match.
-
- unsigned NumConcat = MaskNumElts / SrcNumElts;
-
- // Check if the shuffle is some kind of concatenation of the input vectors.
- bool IsConcat = true;
- SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- int Idx = Mask[i];
- if (Idx < 0)
- continue;
- // Ensure the indices in each SrcVT sized piece are sequential and that
- // the same source is used for the whole piece.
- if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
- (ConcatSrcs[i / SrcNumElts] >= 0 &&
- ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
- IsConcat = false;
- break;
+ if (SrcNumElts < MaskNumElts) {
+ // Mask is longer than the source vectors. We can use concatenate vector to
+ // make the mask and vectors lengths match.
+
+ if (MaskNumElts % SrcNumElts == 0) {
+ // Mask length is a multiple of the source vector length.
+ // Check if the shuffle is some kind of concatenation of the input
+ // vectors.
+ unsigned NumConcat = MaskNumElts / SrcNumElts;
+ bool IsConcat = true;
+ SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
+ for (unsigned i = 0; i != MaskNumElts; ++i) {
+ int Idx = Mask[i];
+ if (Idx < 0)
+ continue;
+ // Ensure the indices in each SrcVT sized piece are sequential and that
+ // the same source is used for the whole piece.
+ if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
+ (ConcatSrcs[i / SrcNumElts] >= 0 &&
+ ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
+ IsConcat = false;
+ break;
+ }
+ // Remember which source this index came from.
+ ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
}
- // Remember which source this index came from.
- ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
- }
- // The shuffle is concatenating multiple vectors together. Just emit
- // a CONCAT_VECTORS operation.
- if (IsConcat) {
- SmallVector<SDValue, 8> ConcatOps;
- for (auto Src : ConcatSrcs) {
- if (Src < 0)
- ConcatOps.push_back(DAG.getUNDEF(SrcVT));
- else if (Src == 0)
- ConcatOps.push_back(Src1);
- else
- ConcatOps.push_back(Src2);
+ // The shuffle is concatenating multiple vectors together. Just emit
+ // a CONCAT_VECTORS operation.
+ if (IsConcat) {
+ SmallVector<SDValue, 8> ConcatOps;
+ for (auto Src : ConcatSrcs) {
+ if (Src < 0)
+ ConcatOps.push_back(DAG.getUNDEF(SrcVT));
+ else if (Src == 0)
+ ConcatOps.push_back(Src1);
+ else
+ ConcatOps.push_back(Src2);
+ }
+ setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
+ return;
}
- setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
- return;
}
+ unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
+ unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
+ EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ PaddedMaskNumElts);
+
// Pad both vectors with undefs to make them the same length as the mask.
SDValue UndefVal = DAG.getUNDEF(SrcVT);
@@ -3063,21 +3070,32 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
MOps1[0] = Src1;
MOps2[0] = Src2;
- Src1 = Src1.isUndef() ? DAG.getUNDEF(VT)
- : DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, MOps1);
- Src2 = Src2.isUndef() ? DAG.getUNDEF(VT)
- : DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, MOps2);
+ Src1 = Src1.isUndef()
+ ? DAG.getUNDEF(PaddedVT)
+ : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
+ Src2 = Src2.isUndef()
+ ? DAG.getUNDEF(PaddedVT)
+ : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
// Readjust mask for new input vector length.
- SmallVector<int, 8> MappedOps;
+ SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
for (unsigned i = 0; i != MaskNumElts; ++i) {
int Idx = Mask[i];
if (Idx >= (int)SrcNumElts)
- Idx -= SrcNumElts - MaskNumElts;
- MappedOps.push_back(Idx);
+ Idx -= SrcNumElts - PaddedMaskNumElts;
+ MappedOps[i] = Idx;
}
- setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
+ SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
+
+ // If the concatenated vector was padded, extract a subvector with the
+ // correct number of elements.
+ if (MaskNumElts != PaddedMaskNumElts)
+ Result = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ setValue(&I, Result);
return;
}
OpenPOWER on IntegriCloud