diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-04-03 21:06:51 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-04-03 21:06:51 +0000 |
commit | af33757b5dec5f99bc78f724a2eb2cd822c14b73 (patch) | |
tree | b79baa363e9ca2afbdb8470ff7b522eba00312f8 /llvm/lib | |
parent | 3b392bb8d85d3cd6cf265e940884394d5f25d641 (diff) | |
download | bcm5719-llvm-af33757b5dec5f99bc78f724a2eb2cd822c14b73.tar.gz bcm5719-llvm-af33757b5dec5f99bc78f724a2eb2cd822c14b73.zip |
[X86][SSE]] Lower BUILD_VECTOR with repeated elts as BUILD_VECTOR + VECTOR_SHUFFLE
It can be costly to transfer from the gprs to the xmm registers and can prevent loads merging.
This patch splits vXi16/vXi32/vXi64 BUILD_VECTORS that use the same operand in multiple elements into a BUILD_VECTOR with only a single insertion of each of those elements and then performs an unary shuffle to duplicate the values.
There are a couple of minor regressions this patch unearths due to some missing MOVDDUP/BROADCAST folds that I will address in a future patch.
Note: Now that vector shuffle lowering and combining is pretty good we should be reusing that instead of duplicating so much in LowerBUILD_VECTOR - this is the first of several patches to address this.
Differential Revision: https://reviews.llvm.org/D31373
llvm-svn: 299387
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 56 |
1 files changed, 55 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fc39d9bff71..50cd8ab1dbc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6120,6 +6120,54 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG, return SDValue(); } +// Attempt to lower a build vector of repeated elts as a build vector of unique +// ops followed by a shuffle. +static SDValue +lowerBuildVectorWithRepeatedEltsUsingShuffle(SDValue V, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + MVT VT = V.getSimpleValueType(); + unsigned NumElts = VT.getVectorNumElements(); + + // TODO - vXi8 insertions+shuffles often cause PSHUFBs which can lead to + // excessive/bulky shuffle mask creation. + if (VT.getScalarSizeInBits() < 16) + return SDValue(); + + // Create list of unique operands to be passed to a build vector and a shuffle + // mask describing the repetitions. + // TODO - we currently insert the first occurances in place - sometimes it + // might be better to insert them in other locations for shuffle efficiency. + bool HasRepeatedElts = false; + SmallVector<int, 16> Mask(NumElts, SM_SentinelUndef); + SmallVector<SDValue, 16> Uniques(V->op_begin(), V->op_end()); + for (unsigned i = 0; i != NumElts; ++i) { + SDValue Op = Uniques[i]; + if (Op.isUndef()) + continue; + Mask[i] = i; + + // Zeros can be efficiently repeated, so don't shuffle these. + if (X86::isZeroNode(Op)) + continue; + + // If any repeated operands are found then mark the build vector entry as + // undef and setup a copy in the shuffle mask. + for (unsigned j = i + 1; j != NumElts; ++j) + if (Op == Uniques[j]) { + HasRepeatedElts = true; + Mask[j] = i; + Uniques[j] = DAG.getUNDEF(VT.getScalarType()); + } + } + + if (!HasRepeatedElts) + return SDValue(); + + SDLoc DL(V); + return DAG.getVectorShuffle(VT, DL, DAG.getBuildVector(VT, DL, Uniques), + DAG.getUNDEF(VT), Mask); +} + /// Custom lower build_vector of v16i8. static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, @@ -7752,11 +7800,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (IsAllConstants) return SDValue(); - // See if we can use a vector load to get all of the elements. if (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) { + // See if we can use a vector load to get all of the elements. SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems); if (SDValue LD = EltsFromConsecutiveLoads(VT, Ops, dl, DAG, false)) return LD; + + // Attempt to lower a build vector of repeated elts as single insertions + // followed by a shuffle. + if (SDValue V = + lowerBuildVectorWithRepeatedEltsUsingShuffle(Op, DAG, Subtarget)) + return V; } // For AVX-length vectors, build the individual 128-bit pieces and use |