summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
diff options
context:
space:
mode:
authorEli Friedman <efriedma@codeaurora.org>2016-12-15 21:36:59 +0000
committerEli Friedman <efriedma@codeaurora.org>2016-12-15 21:36:59 +0000
commit34505083c6530bccfad189a37f9beaac3b442981 (patch)
tree2bbad55401ae052756778c812abea1b14f09b8b4 /llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
parent64c01f7bef8d65716f12e7556238ea8f8c2c010e (diff)
downloadbcm5719-llvm-34505083c6530bccfad189a37f9beaac3b442981.tar.gz
bcm5719-llvm-34505083c6530bccfad189a37f9beaac3b442981.zip
Don't combine a shuffle of two BUILD_VECTORs with duplicate elements.
Targets can't handle this case well in general; we often transform a shuffle of two cheap BUILD_VECTORs to element-by-element insertion, which is very inefficient. Fixes https://llvm.org/bugs/show_bug.cgi?id=31364 . Partially fixes https://llvm.org/bugs/show_bug.cgi?id=31301. Differential Revision: https://reviews.llvm.org/D27787 llvm-svn: 289874
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp33
1 files changed, 23 insertions, 10 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c38a420b7bf..2e9b1e88586 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14076,16 +14076,20 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
-// This combine is done in the following cases:
-// 1. Both N0,N1 are BUILD_VECTOR's composed of constants or undefs.
-// 2. Only one of N0,N1 is a BUILD_VECTOR composed of constants or undefs -
-// Combine iff that node is ALL_ZEROS. We prefer not to combine a
-// BUILD_VECTOR of all constants to allow efficient materialization of
-// constant vectors, but the ALL_ZEROS is an exception because
-// zero-extension matching seems to rely on having BUILD_VECTOR nodes with
-// zero padding between elements. FIXME: Eliminate this exception for
-// ALL_ZEROS constant vectors.
-// 3. Neither N0,N1 are composed of only constants.
+//
+// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
+// a simplification in some sense, but it isn't appropriate in general: some
+// BUILD_VECTORs are substantially cheaper than others. The general case
+// of a BUILD_VECTOR requires inserting each element individually (or
+// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
+// all constants is a single constant pool load. A BUILD_VECTOR where each
+// element is identical is a splat. A BUILD_VECTOR where most of the operands
+// are undef lowers to a small number of element insertions.
+//
+// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
+// We don't fold shuffles where one side is a non-zero constant, and we don't
+// fold shuffles if the resulting BUILD_VECTOR would have duplicate
+// non-constant operands. This seems to work out reasonably well in practice.
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG,
const TargetLowering &TLI) {
@@ -14108,6 +14112,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
}
SmallVector<SDValue, 8> Ops;
+ SmallSet<SDValue, 16> DuplicateOps;
for (int M : SVN->getMask()) {
SDValue Op = DAG.getUNDEF(VT.getScalarType());
if (M >= 0) {
@@ -14123,6 +14128,14 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
return SDValue();
}
}
+
+ // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
+ // fine, but it's likely to generate low-quality code if the target can't
+ // reconstruct an appropriate shuffle.
+ if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
+ if (!DuplicateOps.insert(Op).second)
+ return SDValue();
+
Ops.push_back(Op);
}
// BUILD_VECTOR requires all inputs to be of the same type, find the
OpenPOWER on IntegriCloud