summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2017-08-08 11:03:30 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2017-08-08 11:03:30 +0000
commitef44228acba499cba328ad7ce399be21462e584c (patch)
tree79b6a156ed74bec150568b59e89f59b4763521ed
parent055400469847495627c783c597c35253a00ad128 (diff)
downloadbcm5719-llvm-ef44228acba499cba328ad7ce399be21462e584c.tar.gz
bcm5719-llvm-ef44228acba499cba328ad7ce399be21462e584c.zip
[DAGCombiner] Simplify shuffle mask index if the referenced input element is UNDEF
Fixes one of the cases in PR34041. Differential Revision: https://reviews.llvm.org/D36393 llvm-svn: 310344
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp36
-rw-r--r--llvm/test/CodeGen/X86/avx-vbroadcast.ll18
2 files changed, 42 insertions, 12 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 4b57787699b..573002a6a4b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15112,6 +15112,38 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
}
+static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0,
+ SDValue N1, SelectionDAG &DAG) {
+ // TODO - handle cases other than BUILD_VECTOR.
+ auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ if (!BV0 && !BV1)
+ return SDValue();
+
+ EVT VT = SVN->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ bool Changed = false;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if (BV0 && 0 <= Idx && Idx < (int)NumElts &&
+ BV0->getOperand(Idx).isUndef()) {
+ Changed = true;
+ Idx = -1;
+ } else if (BV1 && Idx > (int)NumElts &&
+ BV1->getOperand(Idx - NumElts).isUndef()) {
+ Changed = true;
+ Idx = -1;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask);
+
+ return SDValue();
+}
+
// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
// or turn a shuffle of a single concat into simpler shuffle then concat.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
@@ -15461,6 +15493,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
}
+ // Simplify shuffle mask if a referenced element is UNDEF.
+ if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG))
+ return V;
+
// A shuffle of a single vector that is a splat can always be folded.
if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
if (N1->isUndef() && N0Shuf->isSplat())
diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
index ccb77a2d2c8..97035ba85b5 100644
--- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
@@ -836,14 +836,12 @@ define <4 x double> @broadcast_shuffle_1000(double* %p) {
; X32-LABEL: broadcast_shuffle_1000:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vbroadcastsd (%eax), %ymm0
; X32-NEXT: retl
;
; X64-LABEL: broadcast_shuffle_1000:
; X64: ## BB#0:
-; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT: vbroadcastsd (%rdi), %ymm0
; X64-NEXT: retq
%1 = load double, double* %p
%2 = insertelement <2 x double> undef, double %1, i32 0
@@ -855,18 +853,14 @@ define <4 x double> @broadcast_shuffle1032(double* %p) {
; X32-LABEL: broadcast_shuffle1032:
; X32: ## BB#0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
-; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: broadcast_shuffle1032:
; X64: ## BB#0:
-; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
-; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq
%1 = load double, double* %p
%2 = insertelement <2 x double> undef, double %1, i32 1
OpenPOWER on IntegriCloud