summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/memcmp.ll
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-07-31 18:08:24 +0000
committerSanjay Patel <spatel@rotateright.com>2017-07-31 18:08:24 +0000
commitfea731a4aa6aabf270fbb9ba6401ca8826c55a9b (patch)
tree93d0d7e6532ef8f3b297548da5097f879e878312 /llvm/test/CodeGen/X86/memcmp.ll
parent70d35e102ef8dbba10e2db84ea2dcbe95bbbfd38 (diff)
downloadbcm5719-llvm-fea731a4aa6aabf270fbb9ba6401ca8826c55a9b.tar.gz
bcm5719-llvm-fea731a4aa6aabf270fbb9ba6401ca8826c55a9b.zip
[CGP] use subtract or subtract-of-cmps for result of memcmp expansion
As noted in the code comment, transforming this in the other direction might require a separate transform here in CGP given the block-at-a-time DAG constraint. Besides that theoretical motivation, there are 2 practical motivations for the subtract-of-cmps form: 1. The codegen for both x86 and PPC is better for this IR (though PPC could be better still). There is discussion about canonicalizing IR to the select form ( http://lists.llvm.org/pipermail/llvm-dev/2017-July/114885.html ), so we probably need to add DAG transforms for those patterns anyway, but this improves the memcmp output without waiting for that step. 2. If we allow vector-sized chunks for the load and compare, x86 is better prepared to convert that to optimal code when using subtract-of-cmps, so another prerequisite patch is avoided if we choose to enable that. Differential Revision: https://reviews.llvm.org/D34904 llvm-svn: 309597
Diffstat (limited to 'llvm/test/CodeGen/X86/memcmp.ll')
-rw-r--r--llvm/test/CodeGen/X86/memcmp.ll68
1 files changed, 28 insertions, 40 deletions
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll
index 889f6a74bf7..409bd7147c5 100644
--- a/llvm/test/CodeGen/X86/memcmp.ll
+++ b/llvm/test/CodeGen/X86/memcmp.ll
@@ -17,15 +17,12 @@ define i32 @length2(i8* %X, i8* %Y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %ecx
-; X86-NEXT: movzwl (%eax), %eax
+; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: rolw $8, %cx
-; X86-NEXT: rolw $8, %ax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpw %ax, %cx
-; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: cmovbl %ecx, %eax
-; X86-NEXT: cmovel %edx, %eax
+; X86-NEXT: rolw $8, %dx
+; X86-NEXT: movzwl %cx, %eax
+; X86-NEXT: movzwl %dx, %ecx
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: length2:
@@ -34,12 +31,9 @@ define i32 @length2(i8* %X, i8* %Y) nounwind {
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpw %cx, %ax
-; X64-NEXT: movl $-1, %ecx
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: cmovbl %ecx, %eax
-; X64-NEXT: cmovel %edx, %eax
+; X64-NEXT: movzwl %ax, %eax
+; X64-NEXT: movzwl %cx, %ecx
+; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
ret i32 %m
@@ -211,29 +205,25 @@ define i32 @length4(i8* %X, i8* %Y) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx
-; X86-NEXT: movl (%eax), %eax
+; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx
-; X86-NEXT: bswapl %eax
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpl %eax, %ecx
-; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: cmovbl %ecx, %eax
-; X86-NEXT: cmovel %edx, %eax
+; X86-NEXT: bswapl %edx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: cmpl %edx, %ecx
+; X86-NEXT: seta %al
+; X86-NEXT: sbbl $0, %eax
; X86-NEXT: retl
;
; X64-LABEL: length4:
; X64: # BB#0:
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: movl (%rsi), %ecx
-; X64-NEXT: bswapl %eax
+; X64-NEXT: movl (%rdi), %ecx
+; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %ecx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpl %ecx, %eax
-; X64-NEXT: movl $-1, %ecx
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: cmovbl %ecx, %eax
-; X64-NEXT: cmovel %edx, %eax
+; X64-NEXT: bswapl %edx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl %edx, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
ret i32 %m
@@ -399,16 +389,14 @@ define i32 @length8(i8* %X, i8* %Y) nounwind {
;
; X64-LABEL: length8:
; X64: # BB#0:
-; X64-NEXT: movq (%rdi), %rax
-; X64-NEXT: movq (%rsi), %rcx
-; X64-NEXT: bswapq %rax
+; X64-NEXT: movq (%rdi), %rcx
+; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: cmpq %rcx, %rax
-; X64-NEXT: movl $-1, %ecx
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: cmovbl %ecx, %eax
-; X64-NEXT: cmovel %edx, %eax
+; X64-NEXT: bswapq %rdx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpq %rdx, %rcx
+; X64-NEXT: seta %al
+; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
ret i32 %m
OpenPOWER on IntegriCloud