diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_ctbits.ll | 84 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-lzcnt-128.ll | 144 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-popcnt-128.ll | 112 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-popcnt-256.ll | 38 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-popcnt-512.ll | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-tzcnt-128.ll | 220 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-tzcnt-256.ll | 72 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-tzcnt-512.ll | 36 |
8 files changed, 262 insertions, 462 deletions
diff --git a/llvm/test/CodeGen/X86/vec_ctbits.ll b/llvm/test/CodeGen/X86/vec_ctbits.ll index 781c61b5789..978a40cbb26 100644 --- a/llvm/test/CodeGen/X86/vec_ctbits.ll +++ b/llvm/test/CodeGen/X86/vec_ctbits.ll @@ -15,18 +15,18 @@ define <2 x i64> @footz(<2 x i64> %a) nounwind { ; CHECK-NEXT: pcmpeqd %xmm3, %xmm3 ; CHECK-NEXT: paddq %xmm2, %xmm3 ; CHECK-NEXT: movdqa %xmm3, %xmm0 -; CHECK-NEXT: psrlq $1, %xmm0 +; CHECK-NEXT: psrlw $1, %xmm0 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 -; CHECK-NEXT: psubq %xmm0, %xmm3 -; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [3689348814741910323,3689348814741910323] +; CHECK-NEXT: psubb %xmm0, %xmm3 +; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; CHECK-NEXT: movdqa %xmm3, %xmm2 ; CHECK-NEXT: pand %xmm0, %xmm2 -; CHECK-NEXT: psrlq $2, %xmm3 +; CHECK-NEXT: psrlw $2, %xmm3 ; CHECK-NEXT: pand %xmm0, %xmm3 -; CHECK-NEXT: paddq %xmm2, %xmm3 +; CHECK-NEXT: paddb %xmm2, %xmm3 ; CHECK-NEXT: movdqa %xmm3, %xmm0 -; CHECK-NEXT: psrlq $4, %xmm0 -; CHECK-NEXT: paddq %xmm3, %xmm0 +; CHECK-NEXT: psrlw $4, %xmm0 +; CHECK-NEXT: paddb %xmm3, %xmm0 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 ; CHECK-NEXT: psadbw %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -58,18 +58,18 @@ define <2 x i64> @foolz(<2 x i64> %a) nounwind { ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 ; CHECK-NEXT: pxor %xmm0, %xmm1 ; CHECK-NEXT: movdqa %xmm1, %xmm0 -; CHECK-NEXT: psrlq $1, %xmm0 +; CHECK-NEXT: psrlw $1, %xmm0 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 -; CHECK-NEXT: psubq %xmm0, %xmm1 -; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [3689348814741910323,3689348814741910323] +; CHECK-NEXT: psubb %xmm0, %xmm1 +; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; CHECK-NEXT: movdqa %xmm1, %xmm2 ; CHECK-NEXT: pand %xmm0, %xmm2 -; CHECK-NEXT: psrlq $2, %xmm1 +; CHECK-NEXT: psrlw $2, %xmm1 ; CHECK-NEXT: pand %xmm0, %xmm1 -; CHECK-NEXT: paddq %xmm2, %xmm1 +; CHECK-NEXT: paddb %xmm2, %xmm1 ; CHECK-NEXT: movdqa %xmm1, %xmm2 -; CHECK-NEXT: psrlq $4, %xmm2 -; CHECK-NEXT: paddq %xmm1, %xmm2 +; CHECK-NEXT: psrlw $4, %xmm2 +; CHECK-NEXT: paddb %xmm1, %xmm2 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm2 ; CHECK-NEXT: pxor %xmm0, %xmm0 ; CHECK-NEXT: psadbw %xmm2, %xmm0 @@ -83,18 +83,18 @@ define <2 x i64> @foopop(<2 x i64> %a) nounwind { ; CHECK-LABEL: foopop: ; CHECK: # %bb.0: ; CHECK-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NEXT: psrlq $1, %xmm1 +; CHECK-NEXT: psrlw $1, %xmm1 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 -; CHECK-NEXT: psubq %xmm1, %xmm0 -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323] +; CHECK-NEXT: psubb %xmm1, %xmm0 +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; CHECK-NEXT: movdqa %xmm0, %xmm2 ; CHECK-NEXT: pand %xmm1, %xmm2 -; CHECK-NEXT: psrlq $2, %xmm0 +; CHECK-NEXT: psrlw $2, %xmm0 ; CHECK-NEXT: pand %xmm1, %xmm0 -; CHECK-NEXT: paddq %xmm2, %xmm0 +; CHECK-NEXT: paddb %xmm2, %xmm0 ; CHECK-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NEXT: psrlq $4, %xmm1 -; CHECK-NEXT: paddq %xmm0, %xmm1 +; CHECK-NEXT: psrlw $4, %xmm1 +; CHECK-NEXT: paddb %xmm0, %xmm1 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 ; CHECK-NEXT: pxor %xmm0, %xmm0 ; CHECK-NEXT: psadbw %xmm0, %xmm1 @@ -119,18 +119,18 @@ define <2 x i32> @promtz(<2 x i32> %a) nounwind { ; CHECK-NEXT: pcmpeqd %xmm3, %xmm3 ; CHECK-NEXT: paddq %xmm2, %xmm3 ; CHECK-NEXT: movdqa %xmm3, %xmm0 -; CHECK-NEXT: psrlq $1, %xmm0 +; CHECK-NEXT: psrlw $1, %xmm0 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 -; CHECK-NEXT: psubq %xmm0, %xmm3 -; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [3689348814741910323,3689348814741910323] +; CHECK-NEXT: psubb %xmm0, %xmm3 +; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; CHECK-NEXT: movdqa %xmm3, %xmm2 ; CHECK-NEXT: pand %xmm0, %xmm2 -; CHECK-NEXT: psrlq $2, %xmm3 +; CHECK-NEXT: psrlw $2, %xmm3 ; CHECK-NEXT: pand %xmm0, %xmm3 -; CHECK-NEXT: paddq %xmm2, %xmm3 +; CHECK-NEXT: paddb %xmm2, %xmm3 ; CHECK-NEXT: movdqa %xmm3, %xmm0 -; CHECK-NEXT: psrlq $4, %xmm0 -; CHECK-NEXT: paddq %xmm3, %xmm0 +; CHECK-NEXT: psrlw $4, %xmm0 +; CHECK-NEXT: paddb %xmm3, %xmm0 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 ; CHECK-NEXT: psadbw %xmm1, %xmm0 ; CHECK-NEXT: retq @@ -164,18 +164,18 @@ define <2 x i32> @promlz(<2 x i32> %a) nounwind { ; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 ; CHECK-NEXT: pxor %xmm0, %xmm2 ; CHECK-NEXT: movdqa %xmm2, %xmm0 -; CHECK-NEXT: psrlq $1, %xmm0 +; CHECK-NEXT: psrlw $1, %xmm0 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 -; CHECK-NEXT: psubq %xmm0, %xmm2 -; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [3689348814741910323,3689348814741910323] +; CHECK-NEXT: psubb %xmm0, %xmm2 +; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; CHECK-NEXT: movdqa %xmm2, %xmm3 ; CHECK-NEXT: pand %xmm0, %xmm3 -; CHECK-NEXT: psrlq $2, %xmm2 +; CHECK-NEXT: psrlw $2, %xmm2 ; CHECK-NEXT: pand %xmm0, %xmm2 -; CHECK-NEXT: paddq %xmm3, %xmm2 +; CHECK-NEXT: paddb %xmm3, %xmm2 ; CHECK-NEXT: movdqa %xmm2, %xmm0 -; CHECK-NEXT: psrlq $4, %xmm0 -; CHECK-NEXT: paddq %xmm2, %xmm0 +; CHECK-NEXT: psrlw $4, %xmm0 +; CHECK-NEXT: paddb %xmm2, %xmm0 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 ; CHECK-NEXT: psadbw %xmm1, %xmm0 ; CHECK-NEXT: psubq {{.*}}(%rip), %xmm0 @@ -191,18 +191,18 @@ define <2 x i32> @prompop(<2 x i32> %a) nounwind { ; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 ; CHECK-NEXT: pxor %xmm2, %xmm2 ; CHECK-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NEXT: psrlq $1, %xmm1 +; CHECK-NEXT: psrlw $1, %xmm1 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 -; CHECK-NEXT: psubq %xmm1, %xmm0 -; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323] +; CHECK-NEXT: psubb %xmm1, %xmm0 +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; CHECK-NEXT: movdqa %xmm0, %xmm3 ; CHECK-NEXT: pand %xmm1, %xmm3 -; CHECK-NEXT: psrlq $2, %xmm0 +; CHECK-NEXT: psrlw $2, %xmm0 ; CHECK-NEXT: pand %xmm1, %xmm0 -; CHECK-NEXT: paddq %xmm3, %xmm0 +; CHECK-NEXT: paddb %xmm3, %xmm0 ; CHECK-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NEXT: psrlq $4, %xmm1 -; CHECK-NEXT: paddq %xmm0, %xmm1 +; CHECK-NEXT: psrlw $4, %xmm1 +; CHECK-NEXT: paddb %xmm0, %xmm1 ; CHECK-NEXT: pand {{.*}}(%rip), %xmm1 ; CHECK-NEXT: psadbw %xmm2, %xmm1 ; CHECK-NEXT: movdqa %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll index dc945c84b19..34ea33d576c 100644 --- a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll @@ -37,18 +37,18 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrlq $1, %xmm0 +; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: psubq %xmm0, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3689348814741910323,3689348814741910323] +; SSE2-NEXT: psubb %xmm0, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: psrlq $2, %xmm1 +; SSE2-NEXT: psrlw $2, %xmm1 ; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: paddq %xmm2, %xmm1 +; SSE2-NEXT: paddb %xmm2, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: psrlq $4, %xmm2 -; SSE2-NEXT: paddq %xmm1, %xmm2 +; SSE2-NEXT: psrlw $4, %xmm2 +; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm2, %xmm0 @@ -77,18 +77,18 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE3-NEXT: pxor %xmm0, %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: psrlq $1, %xmm0 +; SSE3-NEXT: psrlw $1, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE3-NEXT: psubq %xmm0, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3689348814741910323,3689348814741910323] +; SSE3-NEXT: psubb %xmm0, %xmm1 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pand %xmm0, %xmm2 -; SSE3-NEXT: psrlq $2, %xmm1 +; SSE3-NEXT: psrlw $2, %xmm1 ; SSE3-NEXT: pand %xmm0, %xmm1 -; SSE3-NEXT: paddq %xmm2, %xmm1 +; SSE3-NEXT: paddb %xmm2, %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: psrlq $4, %xmm2 -; SSE3-NEXT: paddq %xmm1, %xmm2 +; SSE3-NEXT: psrlw $4, %xmm2 +; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm2, %xmm0 @@ -303,18 +303,18 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind { ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: psrlq $1, %xmm0 +; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: psubq %xmm0, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3689348814741910323,3689348814741910323] +; SSE2-NEXT: psubb %xmm0, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: psrlq $2, %xmm1 +; SSE2-NEXT: psrlw $2, %xmm1 ; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: paddq %xmm2, %xmm1 +; SSE2-NEXT: paddb %xmm2, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: psrlq $4, %xmm2 -; SSE2-NEXT: paddq %xmm1, %xmm2 +; SSE2-NEXT: psrlw $4, %xmm2 +; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm2, %xmm0 @@ -343,18 +343,18 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind { ; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE3-NEXT: pxor %xmm0, %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm0 -; SSE3-NEXT: psrlq $1, %xmm0 +; SSE3-NEXT: psrlw $1, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE3-NEXT: psubq %xmm0, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3689348814741910323,3689348814741910323] +; SSE3-NEXT: psubb %xmm0, %xmm1 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pand %xmm0, %xmm2 -; SSE3-NEXT: psrlq $2, %xmm1 +; SSE3-NEXT: psrlw $2, %xmm1 ; SSE3-NEXT: pand %xmm0, %xmm1 -; SSE3-NEXT: paddq %xmm2, %xmm1 +; SSE3-NEXT: paddb %xmm2, %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm2 -; SSE3-NEXT: psrlq $4, %xmm2 -; SSE3-NEXT: paddq %xmm1, %xmm2 +; SSE3-NEXT: psrlw $4, %xmm2 +; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm2, %xmm0 @@ -566,18 +566,18 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm2 ; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: psrld $1, %xmm0 +; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: psubd %xmm0, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459] +; SSE2-NEXT: psubb %xmm0, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm2, %xmm1 ; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: psrld $2, %xmm2 +; SSE2-NEXT: psrlw $2, %xmm2 ; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: paddd %xmm1, %xmm2 +; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: psrld $4, %xmm0 -; SSE2-NEXT: paddd %xmm2, %xmm0 +; SSE2-NEXT: psrlw $4, %xmm0 +; SSE2-NEXT: paddb %xmm2, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: movdqa %xmm0, %xmm2 @@ -608,18 +608,18 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE3-NEXT: pxor %xmm1, %xmm2 ; SSE3-NEXT: movdqa %xmm2, %xmm0 -; SSE3-NEXT: psrld $1, %xmm0 +; SSE3-NEXT: psrlw $1, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE3-NEXT: psubd %xmm0, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459] +; SSE3-NEXT: psubb %xmm0, %xmm2 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm2, %xmm1 ; SSE3-NEXT: pand %xmm0, %xmm1 -; SSE3-NEXT: psrld $2, %xmm2 +; SSE3-NEXT: psrlw $2, %xmm2 ; SSE3-NEXT: pand %xmm0, %xmm2 -; SSE3-NEXT: paddd %xmm1, %xmm2 +; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: movdqa %xmm2, %xmm0 -; SSE3-NEXT: psrld $4, %xmm0 -; SSE3-NEXT: paddd %xmm2, %xmm0 +; SSE3-NEXT: psrlw $4, %xmm0 +; SSE3-NEXT: paddb %xmm2, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE3-NEXT: pxor %xmm1, %xmm1 ; SSE3-NEXT: movdqa %xmm0, %xmm2 @@ -808,18 +808,18 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind { ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm2 ; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: psrld $1, %xmm0 +; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: psubd %xmm0, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459] +; SSE2-NEXT: psubb %xmm0, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm2, %xmm1 ; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: psrld $2, %xmm2 +; SSE2-NEXT: psrlw $2, %xmm2 ; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: paddd %xmm1, %xmm2 +; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: psrld $4, %xmm0 -; SSE2-NEXT: paddd %xmm2, %xmm0 +; SSE2-NEXT: psrlw $4, %xmm0 +; SSE2-NEXT: paddb %xmm2, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: movdqa %xmm0, %xmm2 @@ -850,18 +850,18 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind { ; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE3-NEXT: pxor %xmm1, %xmm2 ; SSE3-NEXT: movdqa %xmm2, %xmm0 -; SSE3-NEXT: psrld $1, %xmm0 +; SSE3-NEXT: psrlw $1, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE3-NEXT: psubd %xmm0, %xmm2 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459] +; SSE3-NEXT: psubb %xmm0, %xmm2 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm2, %xmm1 ; SSE3-NEXT: pand %xmm0, %xmm1 -; SSE3-NEXT: psrld $2, %xmm2 +; SSE3-NEXT: psrlw $2, %xmm2 ; SSE3-NEXT: pand %xmm0, %xmm2 -; SSE3-NEXT: paddd %xmm1, %xmm2 +; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: movdqa %xmm2, %xmm0 -; SSE3-NEXT: psrld $4, %xmm0 -; SSE3-NEXT: paddd %xmm2, %xmm0 +; SSE3-NEXT: psrlw $4, %xmm0 +; SSE3-NEXT: paddb %xmm2, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE3-NEXT: pxor %xmm1, %xmm1 ; SSE3-NEXT: movdqa %xmm0, %xmm2 @@ -1049,16 +1049,16 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: psubw %xmm0, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107] +; SSE2-NEXT: psubb %xmm0, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pand %xmm0, %xmm2 ; SSE2-NEXT: psrlw $2, %xmm1 ; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: paddw %xmm2, %xmm1 +; SSE2-NEXT: paddb %xmm2, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: psrlw $4, %xmm2 -; SSE2-NEXT: paddw %xmm1, %xmm2 +; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE2-NEXT: movdqa %xmm2, %xmm0 ; SSE2-NEXT: psllw $8, %xmm0 @@ -1085,16 +1085,16 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: psrlw $1, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE3-NEXT: psubw %xmm0, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107] +; SSE3-NEXT: psubb %xmm0, %xmm1 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pand %xmm0, %xmm2 ; SSE3-NEXT: psrlw $2, %xmm1 ; SSE3-NEXT: pand %xmm0, %xmm1 -; SSE3-NEXT: paddw %xmm2, %xmm1 +; SSE3-NEXT: paddb %xmm2, %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: psrlw $4, %xmm2 -; SSE3-NEXT: paddw %xmm1, %xmm2 +; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE3-NEXT: movdqa %xmm2, %xmm0 ; SSE3-NEXT: psllw $8, %xmm0 @@ -1255,16 +1255,16 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: psubw %xmm0, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107] +; SSE2-NEXT: psubb %xmm0, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: pand %xmm0, %xmm2 ; SSE2-NEXT: psrlw $2, %xmm1 ; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: paddw %xmm2, %xmm1 +; SSE2-NEXT: paddb %xmm2, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: psrlw $4, %xmm2 -; SSE2-NEXT: paddw %xmm1, %xmm2 +; SSE2-NEXT: paddb %xmm1, %xmm2 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE2-NEXT: movdqa %xmm2, %xmm0 ; SSE2-NEXT: psllw $8, %xmm0 @@ -1291,16 +1291,16 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind { ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: psrlw $1, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE3-NEXT: psubw %xmm0, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107] +; SSE3-NEXT: psubb %xmm0, %xmm1 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: pand %xmm0, %xmm2 ; SSE3-NEXT: psrlw $2, %xmm1 ; SSE3-NEXT: pand %xmm0, %xmm1 -; SSE3-NEXT: paddw %xmm2, %xmm1 +; SSE3-NEXT: paddb %xmm2, %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm2 ; SSE3-NEXT: psrlw $4, %xmm2 -; SSE3-NEXT: paddw %xmm1, %xmm2 +; SSE3-NEXT: paddb %xmm1, %xmm2 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE3-NEXT: movdqa %xmm2, %xmm0 ; SSE3-NEXT: psllw $8, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128.ll b/llvm/test/CodeGen/X86/vector-popcnt-128.ll index df42ebf2728..16539f1b2d4 100644 --- a/llvm/test/CodeGen/X86/vector-popcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-128.ll @@ -14,18 +14,18 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; SSE2-LABEL: testv2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlq $1, %xmm1 +; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubq %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323] +; SSE2-NEXT: psubb %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrlq $2, %xmm0 +; SSE2-NEXT: psrlw $2, %xmm0 ; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddq %xmm2, %xmm0 +; SSE2-NEXT: paddb %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlq $4, %xmm1 -; SSE2-NEXT: paddq %xmm0, %xmm1 +; SSE2-NEXT: psrlw $4, %xmm1 +; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: psadbw %xmm0, %xmm1 @@ -35,18 +35,18 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; SSE3-LABEL: testv2i64: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlq $1, %xmm1 +; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubq %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323] +; SSE3-NEXT: psubb %xmm1, %xmm0 +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrlq $2, %xmm0 +; SSE3-NEXT: psrlw $2, %xmm0 ; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddq %xmm2, %xmm0 +; SSE3-NEXT: paddb %xmm2, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlq $4, %xmm1 -; SSE3-NEXT: paddq %xmm0, %xmm1 +; SSE3-NEXT: psrlw $4, %xmm1 +; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: psadbw %xmm0, %xmm1 @@ -128,28 +128,16 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; ; BITALG_NOVLX-LABEL: testv2i64: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm2 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG_NOVLX-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; BITALG_NOVLX-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; BITALG_NOVLX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; ; BITALG-LABEL: testv2i64: ; BITALG: # %bb.0: -; BITALG-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm2 -; BITALG-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; BITALG-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; BITALG-NEXT: vpaddb %xmm2, %xmm0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG-NEXT: retq @@ -161,18 +149,18 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; SSE2-LABEL: testv4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $1, %xmm1 +; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubd %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,858993459,858993459] +; SSE2-NEXT: psubb %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pand %xmm1, %xmm2 -; SSE2-NEXT: psrld $2, %xmm0 +; SSE2-NEXT: psrlw $2, %xmm0 ; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddd %xmm2, %xmm0 +; SSE2-NEXT: paddb %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $4, %xmm1 -; SSE2-NEXT: paddd %xmm0, %xmm1 +; SSE2-NEXT: psrlw $4, %xmm1 +; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: pxor %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm2 @@ -187,18 +175,18 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; SSE3-LABEL: testv4i32: ; SSE3: # %bb.0: ; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrld $1, %xmm1 +; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubd %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,858993459,858993459] +; SSE3-NEXT: psubb %xmm1, %xmm0 +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pand %xmm1, %xmm2 -; SSE3-NEXT: psrld $2, %xmm0 +; SSE3-NEXT: psrlw $2, %xmm0 ; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddd %xmm2, %xmm0 +; SSE3-NEXT: paddb %xmm2, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrld $4, %xmm1 -; SSE3-NEXT: paddd %xmm0, %xmm1 +; SSE3-NEXT: psrlw $4, %xmm1 +; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE3-NEXT: pxor %xmm0, %xmm0 ; SSE3-NEXT: movdqa %xmm1, %xmm2 @@ -303,32 +291,20 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; ; BITALG_NOVLX-LABEL: testv4i32: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm2 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG_NOVLX-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; BITALG_NOVLX-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; BITALG_NOVLX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; ; BITALG-LABEL: testv4i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm2 -; BITALG-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %xmm2, %xmm3, %xmm2 -; BITALG-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG-NEXT: vpand %xmm1, %xmm0, %xmm0 -; BITALG-NEXT: vpshufb %xmm0, %xmm3, %xmm0 -; BITALG-NEXT: vpaddb %xmm2, %xmm0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 @@ -346,16 +322,16 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubw %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107] +; SSE2-NEXT: psubb %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pand %xmm1, %xmm2 ; SSE2-NEXT: psrlw $2, %xmm0 ; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddw %xmm2, %xmm0 +; SSE2-NEXT: paddb %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddw %xmm0, %xmm1 +; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: psllw $8, %xmm0 @@ -368,16 +344,16 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubw %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107] +; SSE3-NEXT: psubb %xmm1, %xmm0 +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pand %xmm1, %xmm2 ; SSE3-NEXT: psrlw $2, %xmm0 ; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddw %xmm2, %xmm0 +; SSE3-NEXT: paddb %xmm2, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddw %xmm0, %xmm1 +; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: psllw $8, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-popcnt-256.ll b/llvm/test/CodeGen/X86/vector-popcnt-256.ll index b2cc2f1ebed..570f59673d1 100644 --- a/llvm/test/CodeGen/X86/vector-popcnt-256.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-256.ll @@ -58,28 +58,15 @@ define <4 x i64> @testv4i64(<4 x i64> %in) nounwind { ; ; BITALG_NOVLX-LABEL: testv4i64: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm2 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG_NOVLX-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; BITALG_NOVLX-NEXT: vpsrlw $4, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; BITALG_NOVLX-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; ; BITALG-LABEL: testv4i64: ; BITALG: # %bb.0: -; BITALG-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm2 -; BITALG-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; BITALG-NEXT: vpsrlw $4, %ymm0, %ymm0 -; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; BITALG-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG-NEXT: retq @@ -151,14 +138,8 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind { ; ; BITALG_NOVLX-LABEL: testv8i32: ; BITALG_NOVLX: # %bb.0: -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm2 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG_NOVLX-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; BITALG_NOVLX-NEXT: vpsrlw $4, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpand %ymm1, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; BITALG_NOVLX-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 @@ -169,14 +150,7 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind { ; ; BITALG-LABEL: testv8i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm2 -; BITALG-NEXT: vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %ymm2, %ymm3, %ymm2 -; BITALG-NEXT: vpsrlw $4, %ymm0, %ymm0 -; BITALG-NEXT: vpand %ymm1, %ymm0, %ymm0 -; BITALG-NEXT: vpshufb %ymm0, %ymm3, %ymm0 -; BITALG-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 diff --git a/llvm/test/CodeGen/X86/vector-popcnt-512.ll b/llvm/test/CodeGen/X86/vector-popcnt-512.ll index df5edc13c3e..eae9e6c79bd 100644 --- a/llvm/test/CodeGen/X86/vector-popcnt-512.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-512.ll @@ -50,14 +50,7 @@ define <8 x i64> @testv8i64(<8 x i64> %in) nounwind { ; ; BITALG-LABEL: testv8i64: ; BITALG: # %bb.0: -; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; BITALG-NEXT: vpsrlw $4, %zmm0, %zmm0 -; BITALG-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; BITALG-NEXT: vpaddb %zmm2, %zmm0, %zmm0 +; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 ; BITALG-NEXT: retq @@ -122,14 +115,7 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { ; ; BITALG-LABEL: testv16i32: ; BITALG: # %bb.0: -; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpandq %zmm1, %zmm0, %zmm2 -; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; BITALG-NEXT: vpsrlw $4, %zmm0, %zmm0 -; BITALG-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; BITALG-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; BITALG-NEXT: vpaddb %zmm2, %zmm0, %zmm0 +; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; BITALG-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 diff --git a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll index d19c10d68bc..a532794f89d 100644 --- a/llvm/test/CodeGen/X86/vector-tzcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-tzcnt-128.ll @@ -25,18 +25,18 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 ; SSE2-NEXT: paddq %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: psrlq $1, %xmm0 +; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: psubq %xmm0, %xmm3 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3689348814741910323,3689348814741910323] +; SSE2-NEXT: psubb %xmm0, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm3, %xmm2 ; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: psrlq $2, %xmm3 +; SSE2-NEXT: psrlw $2, %xmm3 ; SSE2-NEXT: pand %xmm0, %xmm3 -; SSE2-NEXT: paddq %xmm2, %xmm3 +; SSE2-NEXT: paddb %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: psrlq $4, %xmm0 -; SSE2-NEXT: paddq %xmm3, %xmm0 +; SSE2-NEXT: psrlw $4, %xmm0 +; SSE2-NEXT: paddb %xmm3, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -50,18 +50,18 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; SSE3-NEXT: pcmpeqd %xmm3, %xmm3 ; SSE3-NEXT: paddq %xmm2, %xmm3 ; SSE3-NEXT: movdqa %xmm3, %xmm0 -; SSE3-NEXT: psrlq $1, %xmm0 +; SSE3-NEXT: psrlw $1, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE3-NEXT: psubq %xmm0, %xmm3 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3689348814741910323,3689348814741910323] +; SSE3-NEXT: psubb %xmm0, %xmm3 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm3, %xmm2 ; SSE3-NEXT: pand %xmm0, %xmm2 -; SSE3-NEXT: psrlq $2, %xmm3 +; SSE3-NEXT: psrlw $2, %xmm3 ; SSE3-NEXT: pand %xmm0, %xmm3 -; SSE3-NEXT: paddq %xmm2, %xmm3 +; SSE3-NEXT: paddb %xmm2, %xmm3 ; SSE3-NEXT: movdqa %xmm3, %xmm0 -; SSE3-NEXT: psrlq $4, %xmm0 -; SSE3-NEXT: paddq %xmm3, %xmm0 +; SSE3-NEXT: psrlw $4, %xmm0 +; SSE3-NEXT: paddb %xmm3, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: retq @@ -155,15 +155,9 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; BITALG_NOVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; BITALG_NOVLX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpand %xmm2, %xmm0, %xmm3 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG_NOVLX-NEXT: vpshufb %xmm3, %xmm4, %xmm3 -; BITALG_NOVLX-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpshufb %xmm0, %xmm4, %xmm0 -; BITALG_NOVLX-NEXT: vpaddb %xmm3, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; ; BITALG-LABEL: testv2i64: @@ -173,14 +167,7 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; BITALG-NEXT: vpand %xmm2, %xmm0, %xmm0 ; BITALG-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; BITALG-NEXT: vpaddq %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpand %xmm2, %xmm0, %xmm3 -; BITALG-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %xmm3, %xmm4, %xmm3 -; BITALG-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG-NEXT: vpand %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpshufb %xmm0, %xmm4, %xmm0 -; BITALG-NEXT: vpaddb %xmm3, %xmm0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG-NEXT: retq ; @@ -217,18 +204,18 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind { ; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 ; SSE2-NEXT: paddq %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: psrlq $1, %xmm0 +; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: psubq %xmm0, %xmm3 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [3689348814741910323,3689348814741910323] +; SSE2-NEXT: psubb %xmm0, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm3, %xmm2 ; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: psrlq $2, %xmm3 +; SSE2-NEXT: psrlw $2, %xmm3 ; SSE2-NEXT: pand %xmm0, %xmm3 -; SSE2-NEXT: paddq %xmm2, %xmm3 +; SSE2-NEXT: paddb %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: psrlq $4, %xmm0 -; SSE2-NEXT: paddq %xmm3, %xmm0 +; SSE2-NEXT: psrlw $4, %xmm0 +; SSE2-NEXT: paddb %xmm3, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: psadbw %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -242,18 +229,18 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind { ; SSE3-NEXT: pcmpeqd %xmm3, %xmm3 ; SSE3-NEXT: paddq %xmm2, %xmm3 ; SSE3-NEXT: movdqa %xmm3, %xmm0 -; SSE3-NEXT: psrlq $1, %xmm0 +; SSE3-NEXT: psrlw $1, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE3-NEXT: psubq %xmm0, %xmm3 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [3689348814741910323,3689348814741910323] +; SSE3-NEXT: psubb %xmm0, %xmm3 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm3, %xmm2 ; SSE3-NEXT: pand %xmm0, %xmm2 -; SSE3-NEXT: psrlq $2, %xmm3 +; SSE3-NEXT: psrlw $2, %xmm3 ; SSE3-NEXT: pand %xmm0, %xmm3 -; SSE3-NEXT: paddq %xmm2, %xmm3 +; SSE3-NEXT: paddb %xmm2, %xmm3 ; SSE3-NEXT: movdqa %xmm3, %xmm0 -; SSE3-NEXT: psrlq $4, %xmm0 -; SSE3-NEXT: paddq %xmm3, %xmm0 +; SSE3-NEXT: psrlw $4, %xmm0 +; SSE3-NEXT: paddb %xmm3, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE3-NEXT: psadbw %xmm1, %xmm0 ; SSE3-NEXT: retq @@ -386,15 +373,9 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind { ; BITALG_NOVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; BITALG_NOVLX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpand %xmm2, %xmm0, %xmm3 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG_NOVLX-NEXT: vpshufb %xmm3, %xmm4, %xmm3 -; BITALG_NOVLX-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpshufb %xmm0, %xmm4, %xmm0 -; BITALG_NOVLX-NEXT: vpaddb %xmm3, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; ; BITALG-LABEL: testv2i64u: @@ -404,14 +385,7 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind { ; BITALG-NEXT: vpand %xmm2, %xmm0, %xmm0 ; BITALG-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; BITALG-NEXT: vpaddq %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpand %xmm2, %xmm0, %xmm3 -; BITALG-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %xmm3, %xmm4, %xmm3 -; BITALG-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG-NEXT: vpand %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpshufb %xmm0, %xmm4, %xmm0 -; BITALG-NEXT: vpaddb %xmm3, %xmm0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG-NEXT: retq ; @@ -448,18 +422,18 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 ; SSE2-NEXT: paddd %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: psrld $1, %xmm0 +; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: psubd %xmm0, %xmm3 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459] +; SSE2-NEXT: psubb %xmm0, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm3, %xmm2 ; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: psrld $2, %xmm3 +; SSE2-NEXT: psrlw $2, %xmm3 ; SSE2-NEXT: pand %xmm0, %xmm3 -; SSE2-NEXT: paddd %xmm2, %xmm3 +; SSE2-NEXT: paddb %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: psrld $4, %xmm0 -; SSE2-NEXT: paddd %xmm3, %xmm0 +; SSE2-NEXT: psrlw $4, %xmm0 +; SSE2-NEXT: paddb %xmm3, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3] @@ -478,18 +452,18 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; SSE3-NEXT: pcmpeqd %xmm3, %xmm3 ; SSE3-NEXT: paddd %xmm2, %xmm3 ; SSE3-NEXT: movdqa %xmm3, %xmm0 -; SSE3-NEXT: psrld $1, %xmm0 +; SSE3-NEXT: psrlw $1, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE3-NEXT: psubd %xmm0, %xmm3 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459] +; SSE3-NEXT: psubb %xmm0, %xmm3 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm3, %xmm2 ; SSE3-NEXT: pand %xmm0, %xmm2 -; SSE3-NEXT: psrld $2, %xmm3 +; SSE3-NEXT: psrlw $2, %xmm3 ; SSE3-NEXT: pand %xmm0, %xmm3 -; SSE3-NEXT: paddd %xmm2, %xmm3 +; SSE3-NEXT: paddb %xmm2, %xmm3 ; SSE3-NEXT: movdqa %xmm3, %xmm0 -; SSE3-NEXT: psrld $4, %xmm0 -; SSE3-NEXT: paddd %xmm3, %xmm0 +; SSE3-NEXT: psrlw $4, %xmm0 +; SSE3-NEXT: paddb %xmm3, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3] @@ -667,19 +641,13 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; BITALG_NOVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; BITALG_NOVLX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpand %xmm2, %xmm0, %xmm3 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG_NOVLX-NEXT: vpshufb %xmm3, %xmm4, %xmm3 -; BITALG_NOVLX-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpshufb %xmm0, %xmm4, %xmm0 -; BITALG_NOVLX-NEXT: vpaddb %xmm3, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; ; BITALG-LABEL: testv4i32: @@ -689,14 +657,7 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; BITALG-NEXT: vpand %xmm2, %xmm0, %xmm0 ; BITALG-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; BITALG-NEXT: vpaddd %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpand %xmm2, %xmm0, %xmm3 -; BITALG-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %xmm3, %xmm4, %xmm3 -; BITALG-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG-NEXT: vpand %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpshufb %xmm0, %xmm4, %xmm0 -; BITALG-NEXT: vpaddb %xmm3, %xmm0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 ; BITALG-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] @@ -742,18 +703,18 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind { ; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 ; SSE2-NEXT: paddd %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: psrld $1, %xmm0 +; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: psubd %xmm0, %xmm3 -; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459] +; SSE2-NEXT: psubb %xmm0, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm3, %xmm2 ; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: psrld $2, %xmm3 +; SSE2-NEXT: psrlw $2, %xmm3 ; SSE2-NEXT: pand %xmm0, %xmm3 -; SSE2-NEXT: paddd %xmm2, %xmm3 +; SSE2-NEXT: paddb %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm3, %xmm0 -; SSE2-NEXT: psrld $4, %xmm0 -; SSE2-NEXT: paddd %xmm3, %xmm0 +; SSE2-NEXT: psrlw $4, %xmm0 +; SSE2-NEXT: paddb %xmm3, %xmm0 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3] @@ -772,18 +733,18 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind { ; SSE3-NEXT: pcmpeqd %xmm3, %xmm3 ; SSE3-NEXT: paddd %xmm2, %xmm3 ; SSE3-NEXT: movdqa %xmm3, %xmm0 -; SSE3-NEXT: psrld $1, %xmm0 +; SSE3-NEXT: psrlw $1, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE3-NEXT: psubd %xmm0, %xmm3 -; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459] +; SSE3-NEXT: psubb %xmm0, %xmm3 +; SSE3-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm3, %xmm2 ; SSE3-NEXT: pand %xmm0, %xmm2 -; SSE3-NEXT: psrld $2, %xmm3 +; SSE3-NEXT: psrlw $2, %xmm3 ; SSE3-NEXT: pand %xmm0, %xmm3 -; SSE3-NEXT: paddd %xmm2, %xmm3 +; SSE3-NEXT: paddb %xmm2, %xmm3 ; SSE3-NEXT: movdqa %xmm3, %xmm0 -; SSE3-NEXT: psrld $4, %xmm0 -; SSE3-NEXT: paddd %xmm3, %xmm0 +; SSE3-NEXT: psrlw $4, %xmm0 +; SSE3-NEXT: paddb %xmm3, %xmm0 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3] @@ -938,19 +899,13 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind { ; BITALG_NOVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; BITALG_NOVLX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpand %xmm2, %xmm0, %xmm3 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG_NOVLX-NEXT: vpshufb %xmm3, %xmm4, %xmm3 -; BITALG_NOVLX-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 -; BITALG_NOVLX-NEXT: vpshufb %xmm0, %xmm4, %xmm0 -; BITALG_NOVLX-NEXT: vpaddb %xmm3, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 ; BITALG_NOVLX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; BITALG_NOVLX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 ; BITALG_NOVLX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 +; BITALG_NOVLX-NEXT: vzeroupper ; BITALG_NOVLX-NEXT: retq ; ; BITALG-LABEL: testv4i32u: @@ -960,14 +915,7 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind { ; BITALG-NEXT: vpand %xmm2, %xmm0, %xmm0 ; BITALG-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; BITALG-NEXT: vpaddd %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpand %xmm2, %xmm0, %xmm3 -; BITALG-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %xmm3, %xmm4, %xmm3 -; BITALG-NEXT: vpsrlw $4, %xmm0, %xmm0 -; BITALG-NEXT: vpand %xmm2, %xmm0, %xmm0 -; BITALG-NEXT: vpshufb %xmm0, %xmm4, %xmm0 -; BITALG-NEXT: vpaddb %xmm3, %xmm0, %xmm0 +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 ; BITALG-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; BITALG-NEXT: vpsadbw %xmm1, %xmm2, %xmm2 ; BITALG-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] @@ -1014,16 +962,16 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubw %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107] +; SSE2-NEXT: psubb %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pand %xmm1, %xmm2 ; SSE2-NEXT: psrlw $2, %xmm0 ; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddw %xmm2, %xmm0 +; SSE2-NEXT: paddb %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddw %xmm0, %xmm1 +; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: psllw $8, %xmm0 @@ -1041,16 +989,16 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubw %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107] +; SSE3-NEXT: psubb %xmm1, %xmm0 +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pand %xmm1, %xmm2 ; SSE3-NEXT: psrlw $2, %xmm0 ; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddw %xmm2, %xmm0 +; SSE3-NEXT: paddb %xmm2, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddw %xmm0, %xmm1 +; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: psllw $8, %xmm0 @@ -1210,16 +1158,16 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: psubw %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107] +; SSE2-NEXT: psubb %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: pand %xmm1, %xmm2 ; SSE2-NEXT: psrlw $2, %xmm0 ; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddw %xmm2, %xmm0 +; SSE2-NEXT: paddb %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddw %xmm0, %xmm1 +; SSE2-NEXT: paddb %xmm0, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: psllw $8, %xmm0 @@ -1237,16 +1185,16 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind { ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE3-NEXT: psubw %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [13107,13107,13107,13107,13107,13107,13107,13107] +; SSE3-NEXT: psubb %xmm1, %xmm0 +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm0, %xmm2 ; SSE3-NEXT: pand %xmm1, %xmm2 ; SSE3-NEXT: psrlw $2, %xmm0 ; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddw %xmm2, %xmm0 +; SSE3-NEXT: paddb %xmm2, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddw %xmm0, %xmm1 +; SSE3-NEXT: paddb %xmm0, %xmm1 ; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE3-NEXT: movdqa %xmm1, %xmm0 ; SSE3-NEXT: psllw $8, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-tzcnt-256.ll b/llvm/test/CodeGen/X86/vector-tzcnt-256.ll index b1173fa4b88..cae0a2d605a 100644 --- a/llvm/test/CodeGen/X86/vector-tzcnt-256.ll +++ b/llvm/test/CodeGen/X86/vector-tzcnt-256.ll @@ -124,14 +124,7 @@ define <4 x i64> @testv4i64(<4 x i64> %in) nounwind { ; BITALG_NOVLX-NEXT: vpand %ymm2, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; BITALG_NOVLX-NEXT: vpaddq %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpand %ymm2, %ymm0, %ymm3 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG_NOVLX-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; BITALG_NOVLX-NEXT: vpsrlw $4, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpand %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; BITALG_NOVLX-NEXT: vpaddb %ymm3, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; @@ -142,14 +135,7 @@ define <4 x i64> @testv4i64(<4 x i64> %in) nounwind { ; BITALG-NEXT: vpand %ymm2, %ymm0, %ymm0 ; BITALG-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; BITALG-NEXT: vpaddq %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpand %ymm2, %ymm0, %ymm3 -; BITALG-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; BITALG-NEXT: vpsrlw $4, %ymm0, %ymm0 -; BITALG-NEXT: vpand %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; BITALG-NEXT: vpaddb %ymm3, %ymm0, %ymm0 +; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG-NEXT: retq ; @@ -270,14 +256,7 @@ define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind { ; BITALG_NOVLX-NEXT: vpand %ymm2, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; BITALG_NOVLX-NEXT: vpaddq %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpand %ymm2, %ymm0, %ymm3 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG_NOVLX-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; BITALG_NOVLX-NEXT: vpsrlw $4, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpand %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; BITALG_NOVLX-NEXT: vpaddb %ymm3, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: retq ; @@ -288,14 +267,7 @@ define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind { ; BITALG-NEXT: vpand %ymm2, %ymm0, %ymm0 ; BITALG-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; BITALG-NEXT: vpaddq %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpand %ymm2, %ymm0, %ymm3 -; BITALG-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; BITALG-NEXT: vpsrlw $4, %ymm0, %ymm0 -; BITALG-NEXT: vpand %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; BITALG-NEXT: vpaddb %ymm3, %ymm0, %ymm0 +; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 ; BITALG-NEXT: retq ; @@ -452,14 +424,7 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind { ; BITALG_NOVLX-NEXT: vpand %ymm2, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; BITALG_NOVLX-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpand %ymm2, %ymm0, %ymm3 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG_NOVLX-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; BITALG_NOVLX-NEXT: vpsrlw $4, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpand %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; BITALG_NOVLX-NEXT: vpaddb %ymm3, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] @@ -474,14 +439,7 @@ define <8 x i32> @testv8i32(<8 x i32> %in) nounwind { ; BITALG-NEXT: vpand %ymm2, %ymm0, %ymm0 ; BITALG-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; BITALG-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpand %ymm2, %ymm0, %ymm3 -; BITALG-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; BITALG-NEXT: vpsrlw $4, %ymm0, %ymm0 -; BITALG-NEXT: vpand %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; BITALG-NEXT: vpaddb %ymm3, %ymm0, %ymm0 +; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 ; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] @@ -623,14 +581,7 @@ define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind { ; BITALG_NOVLX-NEXT: vpand %ymm2, %ymm0, %ymm0 ; BITALG_NOVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; BITALG_NOVLX-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG_NOVLX-NEXT: vpand %ymm2, %ymm0, %ymm3 -; BITALG_NOVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG_NOVLX-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; BITALG_NOVLX-NEXT: vpsrlw $4, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpand %ymm2, %ymm0, %ymm0 -; BITALG_NOVLX-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; BITALG_NOVLX-NEXT: vpaddb %ymm3, %ymm0, %ymm0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG_NOVLX-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; BITALG_NOVLX-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 ; BITALG_NOVLX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] @@ -645,14 +596,7 @@ define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind { ; BITALG-NEXT: vpand %ymm2, %ymm0, %ymm0 ; BITALG-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; BITALG-NEXT: vpaddd %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpand %ymm2, %ymm0, %ymm3 -; BITALG-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %ymm3, %ymm4, %ymm3 -; BITALG-NEXT: vpsrlw $4, %ymm0, %ymm0 -; BITALG-NEXT: vpand %ymm2, %ymm0, %ymm0 -; BITALG-NEXT: vpshufb %ymm0, %ymm4, %ymm0 -; BITALG-NEXT: vpaddb %ymm3, %ymm0, %ymm0 +; BITALG-NEXT: vpopcntb %ymm0, %ymm0 ; BITALG-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] ; BITALG-NEXT: vpsadbw %ymm1, %ymm2, %ymm2 ; BITALG-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] diff --git a/llvm/test/CodeGen/X86/vector-tzcnt-512.ll b/llvm/test/CodeGen/X86/vector-tzcnt-512.ll index 37c86f7f81a..4a9fd82593a 100644 --- a/llvm/test/CodeGen/X86/vector-tzcnt-512.ll +++ b/llvm/test/CodeGen/X86/vector-tzcnt-512.ll @@ -87,14 +87,7 @@ define <8 x i64> @testv8i64(<8 x i64> %in) nounwind { ; BITALG-NEXT: vpandq %zmm2, %zmm0, %zmm0 ; BITALG-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 ; BITALG-NEXT: vpaddq %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpandq %zmm2, %zmm0, %zmm3 -; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %zmm3, %zmm4, %zmm3 -; BITALG-NEXT: vpsrlw $4, %zmm0, %zmm0 -; BITALG-NEXT: vpandq %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vpshufb %zmm0, %zmm4, %zmm0 -; BITALG-NEXT: vpaddb %zmm3, %zmm0, %zmm0 +; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 ; BITALG-NEXT: retq %out = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %in, i1 0) @@ -157,14 +150,7 @@ define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind { ; BITALG-NEXT: vpandq %zmm2, %zmm0, %zmm0 ; BITALG-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 ; BITALG-NEXT: vpaddq %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpandq %zmm2, %zmm0, %zmm3 -; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %zmm3, %zmm4, %zmm3 -; BITALG-NEXT: vpsrlw $4, %zmm0, %zmm0 -; BITALG-NEXT: vpandq %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vpshufb %zmm0, %zmm4, %zmm0 -; BITALG-NEXT: vpaddb %zmm3, %zmm0, %zmm0 +; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 ; BITALG-NEXT: retq %out = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %in, i1 -1) @@ -269,14 +255,7 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { ; BITALG-NEXT: vpandq %zmm2, %zmm0, %zmm0 ; BITALG-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 ; BITALG-NEXT: vpaddd %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpandq %zmm2, %zmm0, %zmm3 -; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %zmm3, %zmm4, %zmm3 -; BITALG-NEXT: vpsrlw $4, %zmm0, %zmm0 -; BITALG-NEXT: vpandq %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vpshufb %zmm0, %zmm4, %zmm0 -; BITALG-NEXT: vpaddb %zmm3, %zmm0, %zmm0 +; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; BITALG-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 ; BITALG-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] @@ -347,14 +326,7 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind { ; BITALG-NEXT: vpandq %zmm2, %zmm0, %zmm0 ; BITALG-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 ; BITALG-NEXT: vpaddd %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; BITALG-NEXT: vpandq %zmm2, %zmm0, %zmm3 -; BITALG-NEXT: vmovdqa64 {{.*#+}} zmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; BITALG-NEXT: vpshufb %zmm3, %zmm4, %zmm3 -; BITALG-NEXT: vpsrlw $4, %zmm0, %zmm0 -; BITALG-NEXT: vpandq %zmm2, %zmm0, %zmm0 -; BITALG-NEXT: vpshufb %zmm0, %zmm4, %zmm0 -; BITALG-NEXT: vpaddb %zmm3, %zmm0, %zmm0 +; BITALG-NEXT: vpopcntb %zmm0, %zmm0 ; BITALG-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; BITALG-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 ; BITALG-NEXT: vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] |

