diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-popcnt-128.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-popcnt-128.ll | 112 |
1 files changed, 44 insertions, 68 deletions
diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128.ll b/llvm/test/CodeGen/X86/vector-popcnt-128.ll index df42ebf2728..16539f1b2d4 100644 --- a/llvm/test/CodeGen/X86/vector-popcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-128.ll @@ -14,18 +14,18 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; SSE2-LABEL: testv2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlq $1, %xmm1 +; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubq %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323] +; SSE2-NEXT: psubb %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlq $2, %xmm0 +; SSE2-NEXT: psrlw $2, %xmm0 ; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddq %xmm2, %xmm0 +; SSE2-NEXT: paddb %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlq $4, %xmm1 -; SSE2-NEXT: paddq %xmm0, %xmm1 +; SSE2-NEXT: psrlw $4, %xmm1 +; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm0, %xmm1 @@ -35,18 +35,18 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; SSE3-LABEL: testv2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlq $1, %xmm1 +; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubq %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323] +; SSE3-NEXT: psubb %xmm1, %xmm0 +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlq $2, %xmm0 +; SSE3-NEXT: psrlw $2, %xmm0 ; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddq %xmm2, %xmm0 +; SSE3-NEXT: paddb %xmm2, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlq $4, %xmm1 -; SSE3-NEXT: paddq %xmm0, %xmm1 +; SSE3-NEXT: psrlw $4, %xmm1 +; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm0, %xmm1 @@ -128,28 +128,16 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; ; BITALG_NOVLX-LABEL: testv2i64: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm2 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG_NOVLX-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; BITALG_NOVLX-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; BITALG_NOVLX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; ; BITALG-LABEL: testv2i64: ; BITALG: # %bb.0: -; BITALG-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm2 -; BITALG-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; BITALG-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; BITALG-NEXT: vpaddb %xmm2, %xmm0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG-NEXT: retq @@ -161,18 +149,18 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; SSE2-LABEL: testv4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $1, %xmm1 +; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubd %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,858993459,858993459] +; SSE2-NEXT: psubb %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrld $2, %xmm0 +; SSE2-NEXT: psrlw $2, %xmm0 ; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddd %xmm2, %xmm0 +; SSE2-NEXT: paddb %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $4, %xmm1 -; SSE2-NEXT: paddd %xmm0, %xmm1 +; SSE2-NEXT: psrlw $4, %xmm1 +; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm2 @@ -187,18 +175,18 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; SSE3-LABEL: testv4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrld $1, %xmm1 +; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubd %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,858993459,858993459] +; SSE3-NEXT: psubb %xmm1, %xmm0 +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrld $2, %xmm0 +; SSE3-NEXT: psrlw $2, %xmm0 ; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddd %xmm2, %xmm0 +; SSE3-NEXT: paddb %xmm2, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrld $4, %xmm1 -; SSE3-NEXT: paddd %xmm0, %xmm1 +; SSE3-NEXT: psrlw $4, %xmm1 +; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: movdqa %xmm1, %xmm2 @@ -303,32 +291,20 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; ; BITALG_NOVLX-LABEL: testv4i32: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm2 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG_NOVLX-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; BITALG_NOVLX-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; BITALG_NOVLX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; ; BITALG-LABEL: testv4i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm2 -; BITALG-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; BITALG-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; BITALG-NEXT: vpaddb %xmm2, %xmm0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 @@ -346,16 +322,16 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubw %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107] +; SSE2-NEXT: psubb %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pand %xmm1, %xmm2 ; SSE2-NEXT: psrlw $2, %xmm0 ; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddw %xmm2, %xmm0 +; SSE2-NEXT: paddb %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddw %xmm0, %xmm1 +; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: psllw $8, %xmm0 @@ -368,16 +344,16 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubw %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107] +; SSE3-NEXT: psubb %xmm1, %xmm0 +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pand %xmm1, %xmm2 ; SSE3-NEXT: psrlw $2, %xmm0 ; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddw %xmm2, %xmm0 +; SSE3-NEXT: paddb %xmm2, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddw %xmm0, %xmm1 +; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: psllw $8, %xmm0 |

