diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2017-07-31 18:08:24 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2017-07-31 18:08:24 +0000 |
| commit | fea731a4aa6aabf270fbb9ba6401ca8826c55a9b (patch) | |
| tree | 93d0d7e6532ef8f3b297548da5097f879e878312 /llvm/test/CodeGen/X86/memcmp.ll | |
| parent | 70d35e102ef8dbba10e2db84ea2dcbe95bbbfd38 (diff) | |
| download | bcm5719-llvm-fea731a4aa6aabf270fbb9ba6401ca8826c55a9b.tar.gz bcm5719-llvm-fea731a4aa6aabf270fbb9ba6401ca8826c55a9b.zip | |
[CGP] use subtract or subtract-of-cmps for result of memcmp expansion
As noted in the code comment, transforming this in the other direction might require
a separate transform here in CGP given the block-at-a-time DAG constraint.
Besides that theoretical motivation, there are 2 practical motivations for the
subtract-of-cmps form:
1. The codegen for both x86 and PPC is better for this IR (though PPC could be better still).
There is discussion about canonicalizing IR to the select form
( http://lists.llvm.org/pipermail/llvm-dev/2017-July/114885.html ),
so we probably need to add DAG transforms for those patterns anyway, but this improves the
memcmp output without waiting for that step.
2. If we allow vector-sized chunks for the load and compare, x86 is better prepared to convert
that to optimal code when using subtract-of-cmps, so another prerequisite patch is avoided
if we choose to enable that.
Differential Revision: https://reviews.llvm.org/D34904
llvm-svn: 309597
Diffstat (limited to 'llvm/test/CodeGen/X86/memcmp.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/memcmp.ll | 68 |
1 files changed, 28 insertions, 40 deletions
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll index 889f6a74bf7..409bd7147c5 100644 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -17,15 +17,12 @@ define i32 @length2(i8* %X, i8* %Y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: movzwl (%eax), %edx ; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %ax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpw %ax, %cx -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: movl $1, %eax -; X86-NEXT: cmovbl %ecx, %eax -; X86-NEXT: cmovel %edx, %eax +; X86-NEXT: rolw $8, %dx +; X86-NEXT: movzwl %cx, %eax +; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: length2: @@ -34,12 +31,9 @@ define i32 @length2(i8* %X, i8* %Y) nounwind { ; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpw %cx, %ax -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax -; X64-NEXT: cmovel %edx, %eax +; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movzwl %cx, %ecx +; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind ret i32 %m @@ -211,29 +205,25 @@ define i32 @length4(i8* %X, i8* %Y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %eax +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: movl $1, %eax -; X86-NEXT: cmovbl %ecx, %eax -; X86-NEXT: cmovel %edx, %eax +; X86-NEXT: bswapl %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: seta %al +; X86-NEXT: sbbl $0, %eax ; X86-NEXT: retl ; ; X64-LABEL: length4: ; X64: # BB#0: -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax -; X64-NEXT: cmovel %edx, %eax +; X64-NEXT: bswapl %edx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl %edx, %ecx +; X64-NEXT: seta %al +; X64-NEXT: sbbl $0, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind ret i32 %m @@ -399,16 +389,14 @@ define i32 @length8(i8* %X, i8* %Y) nounwind { ; ; X64-LABEL: length8: ; X64: # BB#0: -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq (%rsi), %rcx -; X64-NEXT: bswapq %rax +; X64-NEXT: movq (%rdi), %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax -; X64-NEXT: cmovel %edx, %eax +; X64-NEXT: bswapq %rdx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: seta %al +; X64-NEXT: sbbl $0, %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind ret i32 %m |

