diff options
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 33 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-zext.ll | 51 | 
2 files changed, 51 insertions, 33 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5c4f98e1623..5dad16f43e0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17976,6 +17976,34 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,                       Op1, Op0.getOperand(1), NewInsIndex);  } +/// If we have a unary shuffle of a shuffle, see if it can be folded away +/// completely. This has the potential to lose undef knowledge because the first +/// shuffle may not have an undef mask element where the second one does. So +/// only call this after doing simplifications based on demanded elements. +static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) { +  // shuf (shuf0 X, Y, Mask0), undef, Mask +  auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0)); +  if (!Shuf0 || !Shuf->getOperand(1).isUndef()) +    return SDValue(); + +  ArrayRef<int> Mask = Shuf->getMask(); +  ArrayRef<int> Mask0 = Shuf0->getMask(); +  for (int i = 0, e = (int)Mask.size(); i != e; ++i) { +    // Ignore undef elements. +    if (Mask[i] == -1) +      continue; +    assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value"); + +    // Is the element of the shuffle operand chosen by this shuffle the same as +    // the element chosen by the shuffle operand itself? +    if (Mask0[Mask[i]] != Mask0[i]) +      return SDValue(); +  } +  // Every element of this shuffle is identical to the result of the previous +  // shuffle, so we can replace this value. +  return Shuf->getOperand(0); +} +  SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {    EVT VT = N->getValueType(0);    unsigned NumElts = VT.getVectorNumElements(); @@ -18086,6 +18114,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {    if (SimplifyDemandedVectorElts(SDValue(N, 0)))      return SDValue(N, 0); +  // This is intentionally placed after demanded elements simplification because +  // it could eliminate knowledge of undef elements created by this shuffle. +  if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN)) +    return ShufOp; +    // Match shuffles that can be converted to any_vector_extend_in_reg.    if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))      return V; diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll index 2f795fa716b..da6923c2b83 100644 --- a/llvm/test/CodeGen/X86/vector-zext.ll +++ b/llvm/test/CodeGen/X86/vector-zext.ll @@ -2617,32 +2617,23 @@ define <4 x i64> @splatshuf_zext_v4i64(<4 x i32> %x) {  define <8 x i32> @splatshuf_zext_v8i32_matching_undefs(<8 x i16> %x) {  ; SSE2-LABEL: splatshuf_zext_v8i32_matching_undefs:  ; SSE2:       # %bb.0: -; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,0,3,4,5,6,7] -; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,3] -; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7] -; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,5,7] -; SSE2-NEXT:    pxor %xmm2, %xmm2 -; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7] +; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0 +; SSE2-NEXT:    movdqa %xmm0, %xmm1  ; SSE2-NEXT:    retq  ;  ; SSSE3-LABEL: splatshuf_zext_v8i32_matching_undefs:  ; SSSE3:       # %bb.0: +; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[u,u],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero  ; SSSE3-NEXT:    movdqa %xmm0, %xmm1 -; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15] -; SSSE3-NEXT:    pxor %xmm2, %xmm2 -; SSSE3-NEXT:    movdqa %xmm1, %xmm0 -; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]  ; SSSE3-NEXT:    retq  ;  ; SSE41-LABEL: splatshuf_zext_v8i32_matching_undefs:  ; SSE41:       # %bb.0: -; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15] -; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; SSE41-NEXT:    movdqa %xmm2, %xmm0 +; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,8,9,10,11,12,13,14,15] +; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; SSE41-NEXT:    movdqa %xmm0, %xmm1  ; SSE41-NEXT:    retq  ;  ; AVX1-LABEL: splatshuf_zext_v8i32_matching_undefs: @@ -2671,31 +2662,25 @@ define <8 x i32> @splatshuf_zext_v8i32_unmatched_undef(<8 x i16> %x) {  ; SSE2-LABEL: splatshuf_zext_v8i32_unmatched_undef:  ; SSE2:       # %bb.0:  ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] -; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,5,7] -; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,0] -; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,6,5,5,4] -; SSE2-NEXT:    pxor %xmm2, %xmm2 -; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7] +; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7] +; SSE2-NEXT:    pxor %xmm1, %xmm1 +; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE2-NEXT:    movdqa %xmm0, %xmm1  ; SSE2-NEXT:    retq  ;  ; SSSE3-LABEL: splatshuf_zext_v8i32_unmatched_undef:  ; SSSE3:       # %bb.0: +; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero  ; SSSE3-NEXT:    movdqa %xmm0, %xmm1 -; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] -; SSSE3-NEXT:    pxor %xmm2, %xmm2 -; SSSE3-NEXT:    movdqa %xmm1, %xmm0 -; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]  ; SSSE3-NEXT:    retq  ;  ; SSE41-LABEL: splatshuf_zext_v8i32_unmatched_undef:  ; SSE41:       # %bb.0: -; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15] -; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; SSE41-NEXT:    movdqa %xmm2, %xmm0 +; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,14,15,6,7,12,13,14,15] +; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; SSE41-NEXT:    movdqa %xmm0, %xmm1  ; SSE41-NEXT:    retq  ;  ; AVX1-LABEL: splatshuf_zext_v8i32_unmatched_undef:  | 

