diff options
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 20 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx-vbroadcast.ll | 6 |
2 files changed, 17 insertions, 9 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 73b90666c56..b79ff7f146d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15453,7 +15453,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { // // To deal with this, we currently use a bunch of mostly arbitrary heuristics. // We don't fold shuffles where one side is a non-zero constant, and we don't -// fold shuffles if the resulting BUILD_VECTOR would have duplicate +// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate // non-constant operands. This seems to work out reasonably well in practice. static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, @@ -15477,6 +15477,15 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, return SDValue(); } + // If both inputs are splats of the same value then we can safely merge this + // to a single BUILD_VECTOR with undef elements based on the shuffle mask. + bool IsSplat = false; + auto *BV0 = dyn_cast<BuildVectorSDNode>(N0); + auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + if (BV0 && BV1) + if (SDValue Splat0 = BV0->getSplatValue()) + IsSplat = (Splat0 == BV1->getSplatValue()); + SmallVector<SDValue, 8> Ops; SmallSet<SDValue, 16> DuplicateOps; for (int M : SVN->getMask()) { @@ -15495,11 +15504,12 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, } } - // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is - // fine, but it's likely to generate low-quality code if the target can't - // reconstruct an appropriate shuffle. + // Don't duplicate a non-constant BUILD_VECTOR operand unless we're + // generating a splat; semantically, this is fine, but it's likely to + // generate low-quality code if the target can't reconstruct an appropriate + // shuffle. if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op)) - if (!DuplicateOps.insert(Op).second) + if (!IsSplat && !DuplicateOps.insert(Op).second) return SDValue(); Ops.push_back(Op); diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll index 65e856c03c8..5dcc5a70529 100644 --- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll +++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll @@ -853,14 +853,12 @@ define <4 x double> @broadcast_shuffle1032(double* %p) { ; X32-LABEL: broadcast_shuffle1032: ; X32: ## BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-NEXT: vbroadcastsd (%eax), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: broadcast_shuffle1032: ; X64: ## BB#0: -; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-NEXT: vbroadcastsd (%rdi), %ymm0 ; X64-NEXT: retq %1 = load double, double* %p %2 = insertelement <2 x double> undef, double %1, i32 1 |