summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vector-popcnt-128.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-popcnt-128.ll')
-rw-r--r--llvm/test/CodeGen/X86/vector-popcnt-128.ll112
1 files changed, 44 insertions, 68 deletions
diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128.ll b/llvm/test/CodeGen/X86/vector-popcnt-128.ll
index df42ebf2728..16539f1b2d4 100644
--- a/llvm/test/CodeGen/X86/vector-popcnt-128.ll
+++ b/llvm/test/CodeGen/X86/vector-popcnt-128.ll
@@ -14,18 +14,18 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
; SSE2-LABEL: testv2i64:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlq $1, %xmm1
+; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
-; SSE2-NEXT: psubq %xmm1, %xmm0
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323]
+; SSE2-NEXT: psubb %xmm1, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pand %xmm1, %xmm2
-; SSE2-NEXT: psrlq $2, %xmm0
+; SSE2-NEXT: psrlw $2, %xmm0
; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: paddq %xmm2, %xmm0
+; SSE2-NEXT: paddb %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrlq $4, %xmm1
-; SSE2-NEXT: paddq %xmm0, %xmm1
+; SSE2-NEXT: psrlw $4, %xmm1
+; SSE2-NEXT: paddb %xmm0, %xmm1
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: psadbw %xmm0, %xmm1
@@ -35,18 +35,18 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
; SSE3-LABEL: testv2i64:
; SSE3: # %bb.0:
; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrlq $1, %xmm1
+; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
-; SSE3-NEXT: psubq %xmm1, %xmm0
-; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323]
+; SSE3-NEXT: psubb %xmm1, %xmm0
+; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; SSE3-NEXT: movdqa %xmm0, %xmm2
; SSE3-NEXT: pand %xmm1, %xmm2
-; SSE3-NEXT: psrlq $2, %xmm0
+; SSE3-NEXT: psrlw $2, %xmm0
; SSE3-NEXT: pand %xmm1, %xmm0
-; SSE3-NEXT: paddq %xmm2, %xmm0
+; SSE3-NEXT: paddb %xmm2, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrlq $4, %xmm1
-; SSE3-NEXT: paddq %xmm0, %xmm1
+; SSE3-NEXT: psrlw $4, %xmm1
+; SSE3-NEXT: paddb %xmm0, %xmm1
; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: psadbw %xmm0, %xmm1
@@ -128,28 +128,16 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
;
; BITALG_NOVLX-LABEL: testv2i64:
; BITALG_NOVLX: # %bb.0:
-; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm2
-; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; BITALG_NOVLX-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; BITALG_NOVLX-NEXT: vpsrlw $4, %xmm0, %xmm0
-; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; BITALG_NOVLX-NEXT: vpshufb %xmm0, %xmm3, %xmm0
-; BITALG_NOVLX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
+; BITALG_NOVLX-NEXT: vzeroupper
; BITALG_NOVLX-NEXT: retq
;
; BITALG-LABEL: testv2i64:
; BITALG: # %bb.0:
-; BITALG-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm2
-; BITALG-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; BITALG-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; BITALG-NEXT: vpsrlw $4, %xmm0, %xmm0
-; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0
-; BITALG-NEXT: vpshufb %xmm0, %xmm3, %xmm0
-; BITALG-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; BITALG-NEXT: vpopcntb %xmm0, %xmm0
; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; BITALG-NEXT: retq
@@ -161,18 +149,18 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
; SSE2-LABEL: testv4i32:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $1, %xmm1
+; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
-; SSE2-NEXT: psubd %xmm1, %xmm0
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,858993459,858993459]
+; SSE2-NEXT: psubb %xmm1, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pand %xmm1, %xmm2
-; SSE2-NEXT: psrld $2, %xmm0
+; SSE2-NEXT: psrlw $2, %xmm0
; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: paddd %xmm2, %xmm0
+; SSE2-NEXT: paddb %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrld $4, %xmm1
-; SSE2-NEXT: paddd %xmm0, %xmm1
+; SSE2-NEXT: psrlw $4, %xmm1
+; SSE2-NEXT: paddb %xmm0, %xmm1
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm2
@@ -187,18 +175,18 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
; SSE3-LABEL: testv4i32:
; SSE3: # %bb.0:
; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrld $1, %xmm1
+; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
-; SSE3-NEXT: psubd %xmm1, %xmm0
-; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,858993459,858993459]
+; SSE3-NEXT: psubb %xmm1, %xmm0
+; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; SSE3-NEXT: movdqa %xmm0, %xmm2
; SSE3-NEXT: pand %xmm1, %xmm2
-; SSE3-NEXT: psrld $2, %xmm0
+; SSE3-NEXT: psrlw $2, %xmm0
; SSE3-NEXT: pand %xmm1, %xmm0
-; SSE3-NEXT: paddd %xmm2, %xmm0
+; SSE3-NEXT: paddb %xmm2, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
-; SSE3-NEXT: psrld $4, %xmm1
-; SSE3-NEXT: paddd %xmm0, %xmm1
+; SSE3-NEXT: psrlw $4, %xmm1
+; SSE3-NEXT: paddb %xmm0, %xmm1
; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
; SSE3-NEXT: pxor %xmm0, %xmm0
; SSE3-NEXT: movdqa %xmm1, %xmm2
@@ -303,32 +291,20 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
;
; BITALG_NOVLX-LABEL: testv4i32:
; BITALG_NOVLX: # %bb.0:
-; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm2
-; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; BITALG_NOVLX-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; BITALG_NOVLX-NEXT: vpsrlw $4, %xmm0, %xmm0
-; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
-; BITALG_NOVLX-NEXT: vpshufb %xmm0, %xmm3, %xmm0
-; BITALG_NOVLX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0
; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; BITALG_NOVLX-NEXT: vzeroupper
; BITALG_NOVLX-NEXT: retq
;
; BITALG-LABEL: testv4i32:
; BITALG: # %bb.0:
-; BITALG-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm2
-; BITALG-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; BITALG-NEXT: vpshufb %xmm2, %xmm3, %xmm2
-; BITALG-NEXT: vpsrlw $4, %xmm0, %xmm0
-; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0
-; BITALG-NEXT: vpshufb %xmm0, %xmm3, %xmm0
-; BITALG-NEXT: vpaddb %xmm2, %xmm0, %xmm0
+; BITALG-NEXT: vpopcntb %xmm0, %xmm0
; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1
; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2
@@ -346,16 +322,16 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $1, %xmm1
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
-; SSE2-NEXT: psubw %xmm1, %xmm0
-; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107]
+; SSE2-NEXT: psubb %xmm1, %xmm0
+; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: pand %xmm1, %xmm2
; SSE2-NEXT: psrlw $2, %xmm0
; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: paddw %xmm2, %xmm0
+; SSE2-NEXT: paddb %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrlw $4, %xmm1
-; SSE2-NEXT: paddw %xmm0, %xmm1
+; SSE2-NEXT: paddb %xmm0, %xmm1
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: psllw $8, %xmm0
@@ -368,16 +344,16 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $1, %xmm1
; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
-; SSE3-NEXT: psubw %xmm1, %xmm0
-; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107]
+; SSE3-NEXT: psubb %xmm1, %xmm0
+; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
; SSE3-NEXT: movdqa %xmm0, %xmm2
; SSE3-NEXT: pand %xmm1, %xmm2
; SSE3-NEXT: psrlw $2, %xmm0
; SSE3-NEXT: pand %xmm1, %xmm0
-; SSE3-NEXT: paddw %xmm2, %xmm0
+; SSE3-NEXT: paddb %xmm2, %xmm0
; SSE3-NEXT: movdqa %xmm0, %xmm1
; SSE3-NEXT: psrlw $4, %xmm1
-; SSE3-NEXT: paddw %xmm0, %xmm1
+; SSE3-NEXT: paddb %xmm0, %xmm1
; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
; SSE3-NEXT: movdqa %xmm1, %xmm0
; SSE3-NEXT: psllw $8, %xmm0
OpenPOWER on IntegriCloud