summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp25
-rw-r--r--llvm/test/CodeGen/X86/sse3.ll12
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll16
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll58
4 files changed, 56 insertions, 55 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8eef9b58c57..d82f90e7311 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1458,6 +1458,31 @@ bool TargetLowering::SimplifyDemandedVectorElts(
ZeroRHS, TLO, Depth + 1))
return true;
+ // Simplify mask using undef elements from LHS/RHS.
+ bool Updated = false;
+ bool IdentityLHS = true, IdentityRHS = true;
+ SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
+ for (int i = 0; i != NumElts; ++i) {
+ int &M = NewMask[i];
+ if (M < 0)
+ continue;
+ if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
+ (M >= (int)NumElts && UndefRHS[M - NumElts])) {
+ Updated = true;
+ M = -1;
+ }
+ IdentityLHS &= (M < 0) || (M == i);
+ IdentityRHS &= (M < 0) || ((M - NumElts) == i);
+ }
+
+ // Update legal shuffle masks based on demanded elements if it won't reduce
+ // to Identity which can cause premature removal of the shuffle mask.
+ if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps &&
+ isShuffleMaskLegal(NewMask, VT))
+ return TLO.CombineTo(Op,
+ TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0),
+ Op.getOperand(1), NewMask));
+
// Propagate undef/zero elements from LHS/RHS.
for (unsigned i = 0; i != NumElts; ++i) {
int M = ShuffleMask[i];
diff --git a/llvm/test/CodeGen/X86/sse3.ll b/llvm/test/CodeGen/X86/sse3.ll
index 97cc18e296f..1761567f791 100644
--- a/llvm/test/CodeGen/X86/sse3.ll
+++ b/llvm/test/CodeGen/X86/sse3.ll
@@ -396,18 +396,14 @@ entry:
define <4 x i32> @t17() nounwind {
; X86-LABEL: t17:
; X86: # %bb.0: # %entry
-; X86-NEXT: movaps (%eax), %xmm0
-; X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; X86-NEXT: xorps %xmm1, %xmm1
-; X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
+; X86-NEXT: andpd {{\.LCPI.*}}, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: t17:
; X64: # %bb.0: # %entry
-; X64-NEXT: movaps (%rax), %xmm0
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; X64-NEXT: xorps %xmm1, %xmm1
-; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X64-NEXT: movddup {{.*#+}} xmm0 = mem[0,0]
+; X64-NEXT: andpd {{.*}}(%rip), %xmm0
; X64-NEXT: retq
entry:
%tmp1 = load <4 x float>, <4 x float>* undef, align 16
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
index ba162e63d9f..aff1d49063f 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -1563,22 +1563,22 @@ define <4 x i32> @shuffle_v4i32_2456(<4 x i32> %a, <4 x i32> %b) {
;
; SSSE3-LABEL: shuffle_v4i32_2456:
; SSSE3: # %bb.0:
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
+; SSSE3-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v4i32_2456:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,1,2]
-; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
+; SSE41-NEXT: palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: shuffle_v4i32_2456:
; AVX: # %bb.0:
-; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,2]
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,2]
+; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
; AVX-NEXT: retq
%s1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
%s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
index 3ae044b0064..a2fa29ef5b2 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
@@ -973,45 +973,25 @@ define <32 x i8> @PR27320(<8 x i32> %a0) {
}
define internal fastcc <8 x float> @PR34577(<8 x float> %inp0, <8 x float> %inp1, <8 x float> %inp2) {
-; X32-AVX2-LABEL: PR34577:
-; X32-AVX2: # %bb.0: # %entry
-; X32-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X32-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
-; X32-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
-; X32-AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
-; X32-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; X32-AVX2-NEXT: retl
-;
-; X32-AVX512-LABEL: PR34577:
-; X32-AVX512: # %bb.0: # %entry
-; X32-AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X32-AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; X32-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
-; X32-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
-; X32-AVX512-NEXT: vpermps %ymm1, %ymm2, %ymm1
-; X32-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; X32-AVX512-NEXT: retl
-;
-; X64-AVX2-LABEL: PR34577:
-; X64-AVX2: # %bb.0: # %entry
-; X64-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; X64-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
-; X64-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
-; X64-AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
-; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512-LABEL: PR34577:
-; X64-AVX512: # %bb.0: # %entry
-; X64-AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; X64-AVX512-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
-; X64-AVX512-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
-; X64-AVX512-NEXT: vpermps %ymm1, %ymm2, %ymm1
-; X64-AVX512-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; X64-AVX512-NEXT: retq
+; X32-LABEL: PR34577:
+; X32: # %bb.0: # %entry
+; X32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; X32-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; X32-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
+; X32-NEXT: vpermps %ymm1, %ymm2, %ymm1
+; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; X32-NEXT: retl
+;
+; X64-LABEL: PR34577:
+; X64: # %bb.0: # %entry
+; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; X64-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; X64-NEXT: vmovaps {{.*#+}} ymm2 = <u,u,7,2,u,u,3,2>
+; X64-NEXT: vpermps %ymm1, %ymm2, %ymm1
+; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; X64-NEXT: retq
entry:
%shuf0 = shufflevector <8 x float> %inp0, <8 x float> %inp2, <8 x i32> <i32 1, i32 10, i32 11, i32 13, i32 2, i32 13, i32 5, i32 0>
%sel = select <8 x i1> <i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x float> %shuf0, <8 x float> zeroinitializer
OpenPOWER on IntegriCloud