summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp33
-rw-r--r--llvm/test/CodeGen/X86/vector-zext.ll51
2 files changed, 51 insertions, 33 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5c4f98e1623..5dad16f43e0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17976,6 +17976,34 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
Op1, Op0.getOperand(1), NewInsIndex);
}
+/// If we have a unary shuffle of a shuffle, see if it can be folded away
+/// completely. This has the potential to lose undef knowledge because the first
+/// shuffle may not have an undef mask element where the second one does. So
+/// only call this after doing simplifications based on demanded elements.
+static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
+ // shuf (shuf0 X, Y, Mask0), undef, Mask
+ auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
+ if (!Shuf0 || !Shuf->getOperand(1).isUndef())
+ return SDValue();
+
+ ArrayRef<int> Mask = Shuf->getMask();
+ ArrayRef<int> Mask0 = Shuf0->getMask();
+ for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Mask[i] == -1)
+ continue;
+ assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
+
+ // Is the element of the shuffle operand chosen by this shuffle the same as
+ // the element chosen by the shuffle operand itself?
+ if (Mask0[Mask[i]] != Mask0[i])
+ return SDValue();
+ }
+ // Every element of this shuffle is identical to the result of the previous
+ // shuffle, so we can replace this value.
+ return Shuf->getOperand(0);
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -18086,6 +18114,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
return SDValue(N, 0);
+ // This is intentionally placed after demanded elements simplification because
+ // it could eliminate knowledge of undef elements created by this shuffle.
+ if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
+ return ShufOp;
+
// Match shuffles that can be converted to any_vector_extend_in_reg.
if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
return V;
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index 2f795fa716b..da6923c2b83 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -2617,32 +2617,23 @@ define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {
define <8 x i32> @splatshuf_zext_v8i32_matching_undefs(<8 x i16> %x) {
; SSE2-LABEL: splatshuf_zext_v8i32_matching_undefs:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,0,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,5,7]
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: splatshuf_zext_v8i32_matching_undefs:
; SSSE3: # %bb.0:
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[u,u],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15]
-; SSSE3-NEXT: pxor %xmm2, %xmm2
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: splatshuf_zext_v8i32_matching_undefs:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15]
-; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,8,9,10,11,12,13,14,15]
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: splatshuf_zext_v8i32_matching_undefs:
@@ -2671,31 +2662,25 @@ define <8 x i32> @splatshuf_zext_v8i32_unmatched_undef(<8 x i16> %x) {
; SSE2-LABEL: splatshuf_zext_v8i32_unmatched_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,5,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,0]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,6,5,5,4]
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7]
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: splatshuf_zext_v8i32_unmatched_undef:
; SSSE3: # %bb.0:
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
-; SSSE3-NEXT: pxor %xmm2, %xmm2
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: splatshuf_zext_v8i32_unmatched_undef:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
-; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,14,15,6,7,12,13,14,15]
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: splatshuf_zext_v8i32_unmatched_undef:
OpenPOWER on IntegriCloud