diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-10-29 11:29:39 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-10-29 11:29:39 +0000 |
| commit | 75a697a17ea3084fadb83737227f36c142a3c7bb (patch) | |
| tree | 839260a25f3b9a9eccd047e896534907b25c905a /llvm/test/CodeGen/X86 | |
| parent | 25eb42023355f0f575f496391715a69a441a8346 (diff) | |
| download | bcm5719-llvm-75a697a17ea3084fadb83737227f36c142a3c7bb.tar.gz bcm5719-llvm-75a697a17ea3084fadb83737227f36c142a3c7bb.zip | |
[DAGCombiner] (REAPPLIED) Add vector demanded elements support to computeKnownBits
Currently computeKnownBits returns the common known zero/one bits for all elements of vector data, when we may only be interested in one/some of the elements.
This patch adds a DemandedElts argument that allows us to specify the elements we actually care about. The original computeKnownBits implementation calls with a DemandedElts demanding all elements to match current behaviour. Scalar types set this to 1.
The approach was found to be easier than trying to add a per-element known bits solution, for a similar usefulness given the combines where computeKnownBits is typically used.
I've only added support for a few opcodes so far (the ones that have proven straightforward to test), all others will default to demanding all elements but can be updated in due course.
DemandedElts support could similarly be added to computeKnownBitsForTargetNode in a future commit.
This looked like this had caused compile time regressions on some buildbots (and was reverted in rL285381), but appears to have just been a harmless bystander!
Differential Revision: https://reviews.llvm.org/D25691
llvm-svn: 285494
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-vperm2x128.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/known-bits-vector.ll | 50 |
2 files changed, 13 insertions, 43 deletions
diff --git a/llvm/test/CodeGen/X86/avx-vperm2x128.ll b/llvm/test/CodeGen/X86/avx-vperm2x128.ll index 740fd77d82e..f7f54a01d7f 100644 --- a/llvm/test/CodeGen/X86/avx-vperm2x128.ll +++ b/llvm/test/CodeGen/X86/avx-vperm2x128.ll @@ -465,11 +465,9 @@ define <4 x i64> @shuffle_v4i64_67zz(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_67zz: ; AVX1: ## BB#0: ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpaddq %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4i64_67zz: diff --git a/llvm/test/CodeGen/X86/known-bits-vector.ll b/llvm/test/CodeGen/X86/known-bits-vector.ll index e124eda5b56..142fbedccd0 100644 --- a/llvm/test/CodeGen/X86/known-bits-vector.ll +++ b/llvm/test/CodeGen/X86/known-bits-vector.ll @@ -5,16 +5,14 @@ define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind { ; X32-LABEL: knownbits_mask_extract_sext: ; X32: # BB#0: -; X32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: vmovd %xmm0, %eax -; X32-NEXT: cwtl +; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 +; X32-NEXT: vpextrw $0, %xmm0, %eax ; X32-NEXT: retl ; ; X64-LABEL: knownbits_mask_extract_sext: ; X64: # BB#0: -; X64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vmovd %xmm0, %eax -; X64-NEXT: cwtl +; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vpextrw $0, %xmm0, %eax ; X64-NEXT: retq %1 = and <8 x i16> %a0, <i16 15, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> %2 = extractelement <8 x i16> %1, i32 0 @@ -31,17 +29,10 @@ define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind { ; X32-NEXT: subl $16, %esp ; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7] -; X32-NEXT: vpextrd $1, %xmm0, %eax ; X32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: testl %eax, %eax -; X32-NEXT: setns %cl ; X32-NEXT: fildll {{[0-9]+}}(%esp) -; X32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4) ; X32-NEXT: fstps {{[0-9]+}}(%esp) -; X32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X32-NEXT: vmovss %xmm0, (%esp) -; X32-NEXT: flds (%esp) +; X32-NEXT: flds {{[0-9]+}}(%esp) ; X32-NEXT: movl %ebp, %esp ; X32-NEXT: popl %ebp ; X32-NEXT: retl @@ -51,18 +42,7 @@ define float @knownbits_mask_extract_uitofp(<2 x i64> %a0) nounwind { ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4,5,6,7] ; X64-NEXT: vmovq %xmm0, %rax -; X64-NEXT: testq %rax, %rax -; X64-NEXT: js .LBB1_1 -; X64-NEXT: # BB#2: -; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 -; X64-NEXT: retq -; X64-NEXT: .LBB1_1: -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: shrq %rcx -; X64-NEXT: andl $1, %eax -; X64-NEXT: orq %rcx, %rax ; X64-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 -; X64-NEXT: vaddss %xmm0, %xmm0, %xmm0 ; X64-NEXT: retq %1 = and <2 x i64> %a0, <i64 65535, i64 -1> %2 = extractelement <2 x i64> %1, i32 0 @@ -74,15 +54,15 @@ define <4 x i32> @knownbits_mask_shuffle_sext(<8 x i16> %a0) nounwind { ; X32-LABEL: knownbits_mask_shuffle_sext: ; X32: # BB#0: ; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X32-NEXT: vpmovsxwd %xmm0, %xmm0 +; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X32-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; X32-NEXT: retl ; ; X64-LABEL: knownbits_mask_shuffle_sext: ; X64: # BB#0: ; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-NEXT: vpmovsxwd %xmm0, %xmm0 +; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X64-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; X64-NEXT: retq %1 = and <8 x i16> %a0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 15, i16 15, i16 15, i16 15> %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> @@ -95,22 +75,14 @@ define <4 x float> @knownbits_mask_shuffle_uitofp(<4 x i32> %a0) nounwind { ; X32: # BB#0: ; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X32-NEXT: vpsrld $16, %xmm0, %xmm0 -; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: knownbits_mask_shuffle_uitofp: ; X64: # BB#0: ; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] -; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X64-NEXT: vpsrld $16, %xmm0, %xmm0 -; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X64-NEXT: retq %1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085> %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 3> |

