summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/memcmp.ll
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-08-06 16:27:07 +0000
committerSanjay Patel <spatel@rotateright.com>2017-08-06 16:27:07 +0000
commita923c2ee95a4b3b6d43a850789ba56c6aa249b3c (patch)
treebe69b92b4063e1ba19c60cf82a2611f084c21a2f /llvm/test/CodeGen/X86/memcmp.ll
parenta9b5bbac789a69322ec62011bcb8e8462a097e59 (diff)
downloadbcm5719-llvm-a923c2ee95a4b3b6d43a850789ba56c6aa249b3c.tar.gz
bcm5719-llvm-a923c2ee95a4b3b6d43a850789ba56c6aa249b3c.zip
[x86] use more shift or LEA for select-of-constants
We can convert any select-of-constants to math ops: http://rise4fun.com/Alive/d7d For this patch, I'm enhancing an existing x86 transform that uses fake multiplies (they always become shl/lea) to avoid cmov or branching. The current code misses cases where we have a negative constant and a positive constant, so this is just trying to plug that hole. The DAGCombiner diff prevents us from hitting a terrible inefficiency: we can start with a select in IR, create a select DAG node, convert it into a sext, convert it back into a select, and then lower it to sext machine code. Some notes about the test diffs: 1. 2010-08-04-MaskedSignedCompare.ll - We were creating control flow that didn't exist in the IR. 2. memcmp.ll - Choose -1 or 1 is the case that got me looking at this again. I think we could avoid the push/pop in some cases if we used 'movzbl %al' instead of an xor on a different reg? That's a post-DAG problem though. 3. mul-constant-result.ll - The trade-off between sbb+not vs. setne+neg could be addressed if that's a regression, but I think those would always be nearly equivalent. 4. pr22338.ll and sext-i1.ll - These tests have undef operands, so I don't think we actually care about these diffs. 5. sbb.ll - This shows a win for what I think is a common case: choose -1 or 0. 6. select.ll - There's another borderline case here: cmp+sbb+or vs. test+set+lea? Also, sbb+not vs. setae+neg shows up again. 7. select_const.ll - These are motivating cases for the enhancement; replace cmov with cheaper ops. Assembly differences between movzbl and xor to avoid a partial reg stall are caused later by the X86 Fixup SetCC pass. Differential Revision: https://reviews.llvm.org/D35340 llvm-svn: 310208
Diffstat (limited to 'llvm/test/CodeGen/X86/memcmp.ll')
-rw-r--r--llvm/test/CodeGen/X86/memcmp.ll42
1 files changed, 21 insertions, 21 deletions
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll
index 020a9c0c333..a269529b646 100644
--- a/llvm/test/CodeGen/X86/memcmp.ll
+++ b/llvm/test/CodeGen/X86/memcmp.ll
@@ -126,9 +126,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind {
; X86-NEXT: popl %esi
; X86-NEXT: retl
; X86-NEXT: .LBB4_1: # %res_block
-; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: cmovbl %ecx, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
@@ -146,9 +146,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind {
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB4_1: # %res_block
-; X64-NEXT: movl $-1, %ecx
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: cmovbl %ecx, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
ret i32 %m
@@ -283,9 +283,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind {
; X86-NEXT: popl %esi
; X86-NEXT: retl
; X86-NEXT: .LBB9_1: # %res_block
-; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: cmovbl %ecx, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: movzbl %al, %eax
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
@@ -303,9 +303,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind {
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq
; X64-NEXT: .LBB9_1: # %res_block
-; X64-NEXT: movl $-1, %ecx
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: cmovbl %ecx, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
ret i32 %m
@@ -376,10 +376,10 @@ define i32 @length8(i8* %X, i8* %Y) nounwind {
; X86-NEXT: popl %esi
; X86-NEXT: retl
; X86-NEXT: .LBB11_1: # %res_block
+; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx
-; X86-NEXT: movl $-1, %ecx
-; X86-NEXT: movl $1, %eax
-; X86-NEXT: cmovbl %ecx, %eax
+; X86-NEXT: setae %al
+; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
@@ -521,10 +521,10 @@ define i32 @length12(i8* %X, i8* %Y) nounwind {
; X64-NEXT: # BB#3: # %endblock
; X64-NEXT: retq
; X64-NEXT: .LBB15_1: # %res_block
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: movl $-1, %ecx
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: cmovbl %ecx, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
ret i32 %m
@@ -562,10 +562,10 @@ define i32 @length16(i8* %X, i8* %Y) nounwind {
; X64-NEXT: # BB#3: # %endblock
; X64-NEXT: retq
; X64-NEXT: .LBB16_1: # %res_block
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx
-; X64-NEXT: movl $-1, %ecx
-; X64-NEXT: movl $1, %eax
-; X64-NEXT: cmovbl %ecx, %eax
+; X64-NEXT: setae %al
+; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
ret i32 %m
OpenPOWER on IntegriCloud