diff options
author | Sanjay Patel <spatel@rotateright.com> | 2017-08-11 15:44:14 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2017-08-11 15:44:14 +0000 |
commit | 169dae70a680cdfa1779148eb9cb643bb76c8b0e (patch) | |
tree | 83e08148cec571ed6f42847d9ccc7658a73a0f96 /llvm/test/CodeGen/X86/memcmp.ll | |
parent | 1fb1ce0c87b1b2c78068488be3f624d3c0cbb19a (diff) | |
download | bcm5719-llvm-169dae70a680cdfa1779148eb9cb643bb76c8b0e.tar.gz bcm5719-llvm-169dae70a680cdfa1779148eb9cb643bb76c8b0e.zip |
[x86] use more shift or LEA for select-of-constants (2nd try)
The previous rev (r310208) failed to account for overflow when subtracting the
constants to see if they're suitable for shift/lea. This version add a check
for that and more test were added in r310490.
We can convert any select-of-constants to math ops:
http://rise4fun.com/Alive/d7d
For this patch, I'm enhancing an existing x86 transform that uses fake multiplies
(they always become shl/lea) to avoid cmov or branching. The current code misses
cases where we have a negative constant and a positive constant, so this is just
trying to plug that hole.
The DAGCombiner diff prevents us from hitting a terrible inefficiency: we can start
with a select in IR, create a select DAG node, convert it into a sext, convert it
back into a select, and then lower it to sext machine code.
Some notes about the test diffs:
1. 2010-08-04-MaskedSignedCompare.ll - We were creating control flow that didn't exist in the IR.
2. memcmp.ll - Choose -1 or 1 is the case that got me looking at this again. We could avoid the
push/pop in some cases if we used 'movzbl %al' instead of an xor on a different reg? That's a
post-DAG problem though.
3. mul-constant-result.ll - The trade-off between sbb+not vs. setne+neg could be addressed if
that's a regression, but those would always be nearly equivalent.
4. pr22338.ll and sext-i1.ll - These tests have undef operands, so we don't actually care about these diffs.
5. sbb.ll - This shows a win for what is likely a common case: choose -1 or 0.
6. select.ll - There's another borderline case here: cmp+sbb+or vs. test+set+lea? Also, sbb+not vs. setae+neg shows up again.
7. select_const.ll - These are motivating cases for the enhancement; replace cmov with cheaper ops.
Assembly differences between movzbl and xor to avoid a partial reg stall are caused later by the X86 Fixup SetCC pass.
Differential Revision: https://reviews.llvm.org/D35340
llvm-svn: 310717
Diffstat (limited to 'llvm/test/CodeGen/X86/memcmp.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/memcmp.ll | 42 |
1 files changed, 21 insertions, 21 deletions
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll index 020a9c0c333..a269529b646 100644 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -126,9 +126,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind { ; X86-NEXT: popl %esi ; X86-NEXT: retl ; X86-NEXT: .LBB4_1: # %res_block -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: movl $1, %eax -; X86-NEXT: cmovbl %ecx, %eax +; X86-NEXT: setae %al +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -146,9 +146,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind { ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB4_1: # %res_block -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: setae %al +; X64-NEXT: movzbl %al, %eax +; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind ret i32 %m @@ -283,9 +283,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind { ; X86-NEXT: popl %esi ; X86-NEXT: retl ; X86-NEXT: .LBB9_1: # %res_block -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: movl $1, %eax -; X86-NEXT: cmovbl %ecx, %eax +; X86-NEXT: setae %al +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -303,9 +303,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind { ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB9_1: # %res_block -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: setae %al +; X64-NEXT: movzbl %al, %eax +; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind ret i32 %m @@ -376,10 +376,10 @@ define i32 @length8(i8* %X, i8* %Y) nounwind { ; X86-NEXT: popl %esi ; X86-NEXT: retl ; X86-NEXT: .LBB11_1: # %res_block +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: movl $1, %eax -; X86-NEXT: cmovbl %ecx, %eax +; X86-NEXT: setae %al +; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -521,10 +521,10 @@ define i32 @length12(i8* %X, i8* %Y) nounwind { ; X64-NEXT: # BB#3: # %endblock ; X64-NEXT: retq ; X64-NEXT: .LBB15_1: # %res_block +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind ret i32 %m @@ -562,10 +562,10 @@ define i32 @length16(i8* %X, i8* %Y) nounwind { ; X64-NEXT: # BB#3: # %endblock ; X64-NEXT: retq ; X64-NEXT: .LBB16_1: # %res_block +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind ret i32 %m |