diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-09-25 19:26:08 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-09-25 19:26:08 +0000 |
| commit | 5bc10ede53000011f43f4943e8307304c77807b1 (patch) | |
| tree | 3c03699b9021f2abbe1729b1b59678c53aeb9f48 /llvm/test | |
| parent | 68c730a1b22b0f1a3015b9b3f49b53f84539164a (diff) | |
| download | bcm5719-llvm-5bc10ede53000011f43f4943e8307304c77807b1.tar.gz bcm5719-llvm-5bc10ede53000011f43f4943e8307304c77807b1.zip | |
[SelectionDAG] Teach simplifyDemandedBits to handle shifts by constant splat vectors
This teach simplifyDemandedBits to handle constant splat vector shifts.
This required changing some uses of getZExtValue to getLimitedValue since we can't rely on legalization using getShiftAmountTy for the shift amount.
I believe there may have been a bug in the ((X << C1) >>u ShAmt) handling where we didn't check if the inner shift was too large. I've fixed that here.
I had to add new patterns to ARM because the zext/sext the patterns were trying to look for got turned into an any_extend with this patch. Happy to split that out too, but not sure how to test without this change.
Differential Revision: https://reviews.llvm.org/D37665
llvm-svn: 314139
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/ARM/vshll.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/combine-shl.ll | 11 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/not-and-simplify.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-vector-shifts.ll | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-blend.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-rotate-128.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-rotate-256.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/widen_cast-4.ll | 2 |
8 files changed, 12 insertions, 26 deletions
diff --git a/llvm/test/CodeGen/ARM/vshll.ll b/llvm/test/CodeGen/ARM/vshll.ll index a8230134d91..61de4fa9db8 100644 --- a/llvm/test/CodeGen/ARM/vshll.ll +++ b/llvm/test/CodeGen/ARM/vshll.ll @@ -97,7 +97,7 @@ define <8 x i16> @vshllu8_bad(<8 x i8>* %A) nounwind { define <4 x i32> @vshlls16_bad(<4 x i16>* %A) nounwind { ; CHECK-LABEL: vshlls16_bad: -; CHECK: vmovl.s16 +; CHECK: vmovl.u16 ; CHECK: vshl.i32 %tmp1 = load <4 x i16>, <4 x i16>* %A %sext = sext <4 x i16> %tmp1 to <4 x i32> diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll index fbddd3c7532..0d130dc0ee8 100644 --- a/llvm/test/CodeGen/X86/combine-shl.ll +++ b/llvm/test/CodeGen/X86/combine-shl.ll @@ -193,17 +193,16 @@ define <4 x i32> @combine_vec_shl_shl_zero1(<4 x i32> %x) { define <8 x i32> @combine_vec_shl_ext_shl0(<8 x i16> %x) { ; SSE-LABEL: combine_vec_shl_ext_shl0: ; SSE: # BB#0: -; SSE-NEXT: pmovsxwd %xmm0, %xmm2 -; SSE-NEXT: pslld $20, %xmm2 -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; SSE-NEXT: pmovsxwd %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE-NEXT: pslld $20, %xmm1 -; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: pslld $20, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_shl_ext_shl0: ; AVX: # BB#0: -; AVX-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX-NEXT: vpslld $20, %ymm0, %ymm0 ; AVX-NEXT: retq %1 = shl <8 x i16> %x, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> diff --git a/llvm/test/CodeGen/X86/not-and-simplify.ll b/llvm/test/CodeGen/X86/not-and-simplify.ll index 87aa10a6e29..8ecc859bead 100644 --- a/llvm/test/CodeGen/X86/not-and-simplify.ll +++ b/llvm/test/CodeGen/X86/not-and-simplify.ll @@ -47,9 +47,7 @@ define i8 @shrink_xor_constant2(i8 %x) { define <16 x i8> @shrink_xor_constant2_splat(<16 x i8> %x) { ; ALL-LABEL: shrink_xor_constant2_splat: ; ALL: # BB#0: -; ALL-NEXT: psllw $5, %xmm0 -; ALL-NEXT: pand {{.*}}(%rip), %xmm0 -; ALL-NEXT: pandn {{.*}}(%rip), %xmm0 +; ALL-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] ; ALL-NEXT: retq %sh = shl <16 x i8> %x, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5> %not = xor <16 x i8> %sh, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> diff --git a/llvm/test/CodeGen/X86/sse2-vector-shifts.ll b/llvm/test/CodeGen/X86/sse2-vector-shifts.ll index d1c7adb6263..c2bb239639a 100644 --- a/llvm/test/CodeGen/X86/sse2-vector-shifts.ll +++ b/llvm/test/CodeGen/X86/sse2-vector-shifts.ll @@ -336,7 +336,6 @@ define <4 x i16> @sra_trunc_srl_v4i32(<4 x i32> %x) nounwind { define <4 x i32> @shl_zext_shl_v4i32(<4 x i16> %x) nounwind { ; CHECK-LABEL: shl_zext_shl_v4i32: ; CHECK: # BB#0: -; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 ; CHECK-NEXT: pslld $19, %xmm0 ; CHECK-NEXT: retq %shl0 = shl <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2> diff --git a/llvm/test/CodeGen/X86/vector-blend.ll b/llvm/test/CodeGen/X86/vector-blend.ll index 3eff8813726..03aebed9274 100644 --- a/llvm/test/CodeGen/X86/vector-blend.ll +++ b/llvm/test/CodeGen/X86/vector-blend.ll @@ -985,17 +985,15 @@ define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) { ; SSE41-LABEL: blend_neg_logic_v4i32_2: ; SSE41: # BB#0: # %entry ; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrad $31, %xmm1 ; SSE41-NEXT: pxor %xmm3, %xmm3 ; SSE41-NEXT: psubd %xmm2, %xmm3 -; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3 ; SSE41-NEXT: movaps %xmm3, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: blend_neg_logic_v4i32_2: ; AVX: # BB#0: # %entry -; AVX-NEXT: vpsrad $31, %xmm1, %xmm1 ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX-NEXT: vpsubd %xmm0, %xmm2, %xmm2 ; AVX-NEXT: vblendvps %xmm1, %xmm0, %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll index 5fafecae23d..2d407290acc 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -1559,13 +1559,8 @@ define <2 x i64> @splatconstant_rotate_mask_v2i64(<2 x i64> %a) nounwind { ; ; X32-SSE-LABEL: splatconstant_rotate_mask_v2i64: ; X32-SSE: # BB#0: -; X32-SSE-NEXT: movdqa %xmm0, %xmm1 -; X32-SSE-NEXT: psllq $15, %xmm1 ; X32-SSE-NEXT: psrlq $49, %xmm0 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1 -; X32-SSE-NEXT: por %xmm0, %xmm1 -; X32-SSE-NEXT: movdqa %xmm1, %xmm0 ; X32-SSE-NEXT: retl %shl = shl <2 x i64> %a, <i64 15, i64 15> %lshr = lshr <2 x i64> %a, <i64 49, i64 49> @@ -1581,7 +1576,6 @@ define <4 x i32> @splatconstant_rotate_mask_v4i32(<4 x i32> %a) nounwind { ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: pslld $4, %xmm1 ; SSE-NEXT: psrld $28, %xmm0 -; SSE-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE-NEXT: por %xmm0, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 @@ -1591,7 +1585,6 @@ define <4 x i32> @splatconstant_rotate_mask_v4i32(<4 x i32> %a) nounwind { ; AVX: # BB#0: ; AVX-NEXT: vpslld $4, %xmm0, %xmm1 ; AVX-NEXT: vpsrld $28, %xmm0, %xmm0 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq @@ -1621,7 +1614,6 @@ define <4 x i32> @splatconstant_rotate_mask_v4i32(<4 x i32> %a) nounwind { ; X32-SSE-NEXT: movdqa %xmm0, %xmm1 ; X32-SSE-NEXT: pslld $4, %xmm1 ; X32-SSE-NEXT: psrld $28, %xmm0 -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm0 ; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1 ; X32-SSE-NEXT: por %xmm0, %xmm1 ; X32-SSE-NEXT: movdqa %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index 104c77084d9..3f67ea65b00 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -997,10 +997,10 @@ define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind { define <4 x i64> @splatconstant_rotate_mask_v4i64(<4 x i64> %a) nounwind { ; AVX1-LABEL: splatconstant_rotate_mask_v4i64: ; AVX1: # BB#0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpsrlq $49, %xmm0, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpsrlq $49, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlq $49, %xmm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/widen_cast-4.ll b/llvm/test/CodeGen/X86/widen_cast-4.ll index cc6fb27a629..5c352124725 100644 --- a/llvm/test/CodeGen/X86/widen_cast-4.ll +++ b/llvm/test/CodeGen/X86/widen_cast-4.ll @@ -26,7 +26,7 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind { ; NARROW-NEXT: psubw %xmm0, %xmm2 ; NARROW-NEXT: psllw $8, %xmm2 ; NARROW-NEXT: psraw $8, %xmm2 -; NARROW-NEXT: psraw $2, %xmm2 +; NARROW-NEXT: psrlw $2, %xmm2 ; NARROW-NEXT: pshufb %xmm1, %xmm2 ; NARROW-NEXT: movq %xmm2, (%edx,%eax,8) ; NARROW-NEXT: incl (%esp) |

