diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 25 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse3.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll | 58 |
4 files changed, 56 insertions, 55 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8eef9b58c57..d82f90e7311 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1458,6 +1458,31 @@ bool TargetLowering::SimplifyDemandedVectorElts( ZeroRHS, TLO, Depth + 1)) return true; + // Simplify mask using undef elements from LHS/RHS. + bool Updated = false; + bool IdentityLHS = true, IdentityRHS = true; + SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end()); + for (int i = 0; i != NumElts; ++i) { + int &M = NewMask[i]; + if (M < 0) + continue; + if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) || + (M >= (int)NumElts && UndefRHS[M - NumElts])) { + Updated = true; + M = -1; + } + IdentityLHS &= (M < 0) || (M == i); + IdentityRHS &= (M < 0) || ((M - NumElts) == i); + } + + // Update legal shuffle masks based on demanded elements if it won't reduce + // to Identity which can cause premature removal of the shuffle mask. + if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps && + isShuffleMaskLegal(NewMask, VT)) + return TLO.CombineTo(Op, + TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0), + Op.getOperand(1), NewMask)); + // Propagate undef/zero elements from LHS/RHS. for (unsigned i = 0; i != NumElts; ++i) { int M = ShuffleMask[i]; diff --git a/llvm/test/CodeGen/X86/sse3.ll b/llvm/test/CodeGen/X86/sse3.ll index 97cc18e296f..1761567f791 100644 --- a/llvm/test/CodeGen/X86/sse3.ll +++ b/llvm/test/CodeGen/X86/sse3.ll @@ -396,18 +396,14 @@ entry: define <4 x i32> @t17() nounwind { ; X86-LABEL: t17: ; X86: # %bb.0: # %entry -; X86-NEXT: movaps (%eax), %xmm0 -; X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1] -; X86-NEXT: xorps %xmm1, %xmm1 -; X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] +; X86-NEXT: andpd {{\.LCPI.*}}, %xmm0 ; X86-NEXT: retl ; ; X64-LABEL: t17: ; X64: # %bb.0: # %entry -; X64-NEXT: movaps (%rax), %xmm0 -; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1] -; X64-NEXT: xorps %xmm1, %xmm1 -; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] +; X64-NEXT: andpd {{.*}}(%rip), %xmm0 ; X64-NEXT: retq entry: %tmp1 = load <4 x float>, <4 x float>* undef, align 16 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll index ba162e63d9f..aff1d49063f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -1563,22 +1563,22 @@ define <4 x i32> @shuffle_v4i32_2456(<4 x i32> %a, <4 x i32> %b) { ; ; SSSE3-LABEL: shuffle_v4i32_2456: ; SSSE3: # %bb.0: -; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0] -; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2] +; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2] +; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] +; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v4i32_2456: ; SSE41: # %bb.0: -; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,1,2] -; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7] +; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2] +; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_2456: ; AVX: # %bb.0: -; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] -; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,2] -; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2] +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11] ; AVX-NEXT: retq %s1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2> %s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 3ae044b0064..a2fa29ef5b2 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -973,45 +973,25 @@ define <32 x i8> @PR27320(<8 x i32> %a0) { } define internal fastcc <8 x float> @PR34577(<8 x float> %inp0, <8 x float> %inp1, <8 x float> %inp2) { -; X32-AVX2-LABEL: PR34577: -; X32-AVX2: # %bb.0: # %entry -; X32-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X32-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; X32-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] -; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2> -; X32-AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 -; X32-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] -; X32-AVX2-NEXT: retl -; -; X32-AVX512-LABEL: PR34577: -; X32-AVX512: # %bb.0: # %entry -; X32-AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X32-AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] -; X32-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] -; X32-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2> -; X32-AVX512-NEXT: vpermps %ymm1, %ymm2, %ymm1 -; X32-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] -; X32-AVX512-NEXT: retl -; -; X64-AVX2-LABEL: PR34577: -; X64-AVX2: # %bb.0: # %entry -; X64-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] -; X64-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2> -; X64-AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 -; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] -; X64-AVX2-NEXT: retq -; -; X64-AVX512-LABEL: PR34577: -; X64-AVX512: # %bb.0: # %entry -; X64-AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; X64-AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] -; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] -; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2> -; X64-AVX512-NEXT: vpermps %ymm1, %ymm2, %ymm1 -; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] -; X64-AVX512-NEXT: retq +; X32-LABEL: PR34577: +; X32: # %bb.0: # %entry +; X32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] +; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; X32-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2> +; X32-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] +; X32-NEXT: retl +; +; X64-LABEL: PR34577: +; X64: # %bb.0: # %entry +; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] +; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; X64-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2> +; X64-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] +; X64-NEXT: retq entry: %shuf0 = shufflevector <8 x float> %inp0, <8 x float> %inp2, <8 x i32> <i32 1, i32 10, i32 11, i32 13, i32 2, i32 13, i32 5, i32 0> %sel = select <8 x i1> <i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x float> %shuf0, <8 x float> zeroinitializer |