summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp20
-rw-r--r--llvm/test/CodeGen/X86/avx-vbroadcast.ll6
2 files changed, 17 insertions, 9 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 73b90666c56..b79ff7f146d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15453,7 +15453,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
//
// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
// We don't fold shuffles where one side is a non-zero constant, and we don't
-// fold shuffles if the resulting BUILD_VECTOR would have duplicate
+// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
// non-constant operands. This seems to work out reasonably well in practice.
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG,
@@ -15477,6 +15477,15 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
return SDValue();
}
+ // If both inputs are splats of the same value then we can safely merge this
+ // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
+ bool IsSplat = false;
+ auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ if (BV0 && BV1)
+ if (SDValue Splat0 = BV0->getSplatValue())
+ IsSplat = (Splat0 == BV1->getSplatValue());
+
SmallVector<SDValue, 8> Ops;
SmallSet<SDValue, 16> DuplicateOps;
for (int M : SVN->getMask()) {
@@ -15495,11 +15504,12 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
}
}
- // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
- // fine, but it's likely to generate low-quality code if the target can't
- // reconstruct an appropriate shuffle.
+ // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
+ // generating a splat; semantically, this is fine, but it's likely to
+ // generate low-quality code if the target can't reconstruct an appropriate
+ // shuffle.
if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
- if (!DuplicateOps.insert(Op).second)
+ if (!IsSplat && !DuplicateOps.insert(Op).second)
return SDValue();
Ops.push_back(Op);
diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
index 65e856c03c8..5dcc5a70529 100644
--- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
@@ -853,14 +853,12 @@ define <4 x double> @broadcast_shuffle1032(double* %p) {
; X32-LABEL: broadcast_shuffle1032:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vbroadcastsd (%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: broadcast_shuffle1032:
; X64: ## BB#0:
-; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT: vbroadcastsd (%rdi), %ymm0
; X64-NEXT: retq
%1 = load double, double* %p
%2 = insertelement <2 x double> undef, double %1, i32 1
OpenPOWER on IntegriCloud