summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/vec_ctbits.ll
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-08-04 10:14:39 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-08-04 10:14:39 +0000
commit8ae6dad49b4440641073a2ef4001875ebf406352 (patch)
tree983d87fce96ccad8f54a741f3f53dd487d635dc9 /llvm/test/CodeGen/X86/vec_ctbits.ll
parent87e6d99487ddd7ba4bdc582600f0dfc2ef271ae7 (diff)
downloadbcm5719-llvm-8ae6dad49b4440641073a2ef4001875ebf406352.tar.gz
bcm5719-llvm-8ae6dad49b4440641073a2ef4001875ebf406352.zip
[X86][SSE] Don't decide when to scalarize CTTZ/CTLZ for performance at lowering - this is what cost models are for
Improved CTTZ/CTLZ costings will be added shortly llvm-svn: 277713
Diffstat (limited to 'llvm/test/CodeGen/X86/vec_ctbits.ll')
-rw-r--r--llvm/test/CodeGen/X86/vec_ctbits.ll61
1 files changed, 40 insertions, 21 deletions
diff --git a/llvm/test/CodeGen/X86/vec_ctbits.ll b/llvm/test/CodeGen/X86/vec_ctbits.ll
index e151317c658..23f70db586f 100644
--- a/llvm/test/CodeGen/X86/vec_ctbits.ll
+++ b/llvm/test/CodeGen/X86/vec_ctbits.ll
@@ -8,15 +8,26 @@ declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
define <2 x i64> @footz(<2 x i64> %a) nounwind {
; CHECK-LABEL: footz:
; CHECK: # BB#0:
-; CHECK-NEXT: movd %xmm0, %rax
-; CHECK-NEXT: bsfq %rax, %rax
-; CHECK-NEXT: movd %rax, %xmm1
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; CHECK-NEXT: movd %xmm0, %rax
-; CHECK-NEXT: bsfq %rax, %rax
-; CHECK-NEXT: movd %rax, %xmm0
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pxor %xmm2, %xmm2
+; CHECK-NEXT: psubq %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm0, %xmm2
+; CHECK-NEXT: psubq {{.*}}(%rip), %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: psrlq $1, %xmm0
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT: psubq %xmm0, %xmm2
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [3689348814741910323,3689348814741910323]
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pand %xmm0, %xmm3
+; CHECK-NEXT: psrlq $2, %xmm2
+; CHECK-NEXT: pand %xmm0, %xmm2
+; CHECK-NEXT: paddq %xmm3, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: psrlq $4, %xmm0
+; CHECK-NEXT: paddq %xmm2, %xmm0
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT: psadbw %xmm1, %xmm0
; CHECK-NEXT: retq
%c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
ret <2 x i64> %c
@@ -75,18 +86,26 @@ define <2 x i32> @promtz(<2 x i32> %a) nounwind {
; CHECK-LABEL: promtz:
; CHECK: # BB#0:
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
-; CHECK-NEXT: movd %xmm0, %rax
-; CHECK-NEXT: bsfq %rax, %rax
-; CHECK-NEXT: movl $64, %ecx
-; CHECK-NEXT: cmoveq %rcx, %rax
-; CHECK-NEXT: movd %rax, %xmm1
-; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; CHECK-NEXT: movd %xmm0, %rax
-; CHECK-NEXT: bsfq %rax, %rax
-; CHECK-NEXT: cmoveq %rcx, %rax
-; CHECK-NEXT: movd %rax, %xmm0
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: pxor %xmm1, %xmm1
+; CHECK-NEXT: pxor %xmm2, %xmm2
+; CHECK-NEXT: psubq %xmm0, %xmm2
+; CHECK-NEXT: pand %xmm0, %xmm2
+; CHECK-NEXT: psubq {{.*}}(%rip), %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: psrlq $1, %xmm0
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT: psubq %xmm0, %xmm2
+; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [3689348814741910323,3689348814741910323]
+; CHECK-NEXT: movdqa %xmm2, %xmm3
+; CHECK-NEXT: pand %xmm0, %xmm3
+; CHECK-NEXT: psrlq $2, %xmm2
+; CHECK-NEXT: pand %xmm0, %xmm2
+; CHECK-NEXT: paddq %xmm3, %xmm2
+; CHECK-NEXT: movdqa %xmm2, %xmm0
+; CHECK-NEXT: psrlq $4, %xmm0
+; CHECK-NEXT: paddq %xmm2, %xmm0
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT: psadbw %xmm1, %xmm0
; CHECK-NEXT: retq
%c = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
ret <2 x i32> %c
OpenPOWER on IntegriCloud