summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/select_const.ll
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-08-11 15:44:14 +0000
committerSanjay Patel <spatel@rotateright.com>2017-08-11 15:44:14 +0000
commit169dae70a680cdfa1779148eb9cb643bb76c8b0e (patch)
tree83e08148cec571ed6f42847d9ccc7658a73a0f96 /llvm/test/CodeGen/X86/select_const.ll
parent1fb1ce0c87b1b2c78068488be3f624d3c0cbb19a (diff)
downloadbcm5719-llvm-169dae70a680cdfa1779148eb9cb643bb76c8b0e.tar.gz
bcm5719-llvm-169dae70a680cdfa1779148eb9cb643bb76c8b0e.zip
[x86] use more shift or LEA for select-of-constants (2nd try)
The previous rev (r310208) failed to account for overflow when subtracting the constants to see if they're suitable for shift/lea. This version add a check for that and more test were added in r310490. We can convert any select-of-constants to math ops: http://rise4fun.com/Alive/d7d For this patch, I'm enhancing an existing x86 transform that uses fake multiplies (they always become shl/lea) to avoid cmov or branching. The current code misses cases where we have a negative constant and a positive constant, so this is just trying to plug that hole. The DAGCombiner diff prevents us from hitting a terrible inefficiency: we can start with a select in IR, create a select DAG node, convert it into a sext, convert it back into a select, and then lower it to sext machine code. Some notes about the test diffs: 1. 2010-08-04-MaskedSignedCompare.ll - We were creating control flow that didn't exist in the IR. 2. memcmp.ll - Choose -1 or 1 is the case that got me looking at this again. We could avoid the push/pop in some cases if we used 'movzbl %al' instead of an xor on a different reg? That's a post-DAG problem though. 3. mul-constant-result.ll - The trade-off between sbb+not vs. setne+neg could be addressed if that's a regression, but those would always be nearly equivalent. 4. pr22338.ll and sext-i1.ll - These tests have undef operands, so we don't actually care about these diffs. 5. sbb.ll - This shows a win for what is likely a common case: choose -1 or 0. 6. select.ll - There's another borderline case here: cmp+sbb+or vs. test+set+lea? Also, sbb+not vs. setae+neg shows up again. 7. select_const.ll - These are motivating cases for the enhancement; replace cmov with cheaper ops. Assembly differences between movzbl and xor to avoid a partial reg stall are caused later by the X86 Fixup SetCC pass. Differential Revision: https://reviews.llvm.org/D35340 llvm-svn: 310717
Diffstat (limited to 'llvm/test/CodeGen/X86/select_const.ll')
-rw-r--r--llvm/test/CodeGen/X86/select_const.ll100
1 files changed, 47 insertions, 53 deletions
diff --git a/llvm/test/CodeGen/X86/select_const.ll b/llvm/test/CodeGen/X86/select_const.ll
index b439d26b286..7d13cc7b788 100644
--- a/llvm/test/CodeGen/X86/select_const.ll
+++ b/llvm/test/CodeGen/X86/select_const.ll
@@ -211,10 +211,9 @@ define i32 @select_C_Cplus1_signext(i1 signext %cond) {
define i32 @select_lea_2(i1 zeroext %cond) {
; CHECK-LABEL: select_lea_2:
; CHECK: # BB#0:
-; CHECK-NEXT: testb %dil, %dil
-; CHECK-NEXT: movl $-1, %ecx
-; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: xorb $1, %dil
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: leal -1(%rax,%rax), %eax
; CHECK-NEXT: retq
%sel = select i1 %cond, i32 -1, i32 1
ret i32 %sel
@@ -223,10 +222,9 @@ define i32 @select_lea_2(i1 zeroext %cond) {
define i64 @select_lea_3(i1 zeroext %cond) {
; CHECK-LABEL: select_lea_3:
; CHECK: # BB#0:
-; CHECK-NEXT: testb %dil, %dil
-; CHECK-NEXT: movl $1, %ecx
-; CHECK-NEXT: movq $-2, %rax
-; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: xorb $1, %dil
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: leaq -2(%rax,%rax,2), %rax
; CHECK-NEXT: retq
%sel = select i1 %cond, i64 -2, i64 1
ret i64 %sel
@@ -235,10 +233,9 @@ define i64 @select_lea_3(i1 zeroext %cond) {
define i32 @select_lea_5(i1 zeroext %cond) {
; CHECK-LABEL: select_lea_5:
; CHECK: # BB#0:
-; CHECK-NEXT: testb %dil, %dil
-; CHECK-NEXT: movl $-2, %ecx
-; CHECK-NEXT: movl $3, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: xorb $1, %dil
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: leal -2(%rax,%rax,4), %eax
; CHECK-NEXT: retq
%sel = select i1 %cond, i32 -2, i32 3
ret i32 %sel
@@ -247,10 +244,9 @@ define i32 @select_lea_5(i1 zeroext %cond) {
define i64 @select_lea_9(i1 zeroext %cond) {
; CHECK-LABEL: select_lea_9:
; CHECK: # BB#0:
-; CHECK-NEXT: testb %dil, %dil
-; CHECK-NEXT: movl $2, %ecx
-; CHECK-NEXT: movq $-7, %rax
-; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: xorb $1, %dil
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: leaq -7(%rax,%rax,8), %rax
; CHECK-NEXT: retq
%sel = select i1 %cond, i64 -7, i64 2
ret i64 %sel
@@ -262,8 +258,8 @@ define i64 @sel_1_2(i64 %x, i64 %y) {
; CHECK-LABEL: sel_1_2:
; CHECK: # BB#0:
; CHECK-NEXT: cmpq $42, %rdi
-; CHECK-NEXT: sbbq %rax, %rax
-; CHECK-NEXT: leaq 2(%rax,%rsi), %rax
+; CHECK-NEXT: sbbq $0, %rsi
+; CHECK-NEXT: leaq 2(%rsi), %rax
; CHECK-NEXT: retq
%cmp = icmp ult i64 %x, 42
%sel = select i1 %cmp, i64 1, i64 2
@@ -271,30 +267,31 @@ define i64 @sel_1_2(i64 %x, i64 %y) {
ret i64 %sub
}
-; No LEA with 8-bit or 16-bit, but this shouldn't need branches or cmov.
+; No LEA with 8-bit, but this shouldn't need branches or cmov.
define i8 @sel_1_neg1(i32 %x) {
; CHECK-LABEL: sel_1_neg1:
; CHECK: # BB#0:
; CHECK-NEXT: cmpl $42, %edi
-; CHECK-NEXT: movb $3, %al
-; CHECK-NEXT: jg .LBB23_2
-; CHECK-NEXT: # BB#1:
-; CHECK-NEXT: movb $-1, %al
-; CHECK-NEXT: .LBB23_2:
+; CHECK-NEXT: setg %al
+; CHECK-NEXT: shlb $2, %al
+; CHECK-NEXT: decb %al
; CHECK-NEXT: retq
%cmp = icmp sgt i32 %x, 42
%sel = select i1 %cmp, i8 3, i8 -1
ret i8 %sel
}
+; We get an LEA for 16-bit because we ignore the high-bits.
+
define i16 @sel_neg1_1(i32 %x) {
; CHECK-LABEL: sel_neg1_1:
; CHECK: # BB#0:
-; CHECK-NEXT: cmpl $42, %edi
-; CHECK-NEXT: movw $-1, %cx
-; CHECK-NEXT: movw $3, %ax
-; CHECK-NEXT: cmovgw %cx, %ax
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl $43, %edi
+; CHECK-NEXT: setl %al
+; CHECK-NEXT: leal -1(,%rax,4), %eax
+; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%cmp = icmp sgt i32 %x, 42
%sel = select i1 %cmp, i16 -1, i16 3
@@ -306,10 +303,10 @@ define i16 @sel_neg1_1(i32 %x) {
define i32 @sel_1_neg1_32(i32 %x) {
; CHECK-LABEL: sel_1_neg1_32:
; CHECK: # BB#0:
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl $42, %edi
-; CHECK-NEXT: movl $8, %ecx
-; CHECK-NEXT: movl $-1, %eax
-; CHECK-NEXT: cmovgl %ecx, %eax
+; CHECK-NEXT: setg %al
+; CHECK-NEXT: leal -1(%rax,%rax,8), %eax
; CHECK-NEXT: retq
%cmp = icmp sgt i32 %x, 42
%sel = select i1 %cmp, i32 8, i32 -1
@@ -319,10 +316,10 @@ define i32 @sel_1_neg1_32(i32 %x) {
define i32 @sel_neg1_1_32(i32 %x) {
; CHECK-LABEL: sel_neg1_1_32:
; CHECK: # BB#0:
-; CHECK-NEXT: cmpl $42, %edi
-; CHECK-NEXT: movl $-7, %ecx
-; CHECK-NEXT: movl $2, %eax
-; CHECK-NEXT: cmovgl %ecx, %eax
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: cmpl $43, %edi
+; CHECK-NEXT: setl %al
+; CHECK-NEXT: leal -7(%rax,%rax,8), %eax
; CHECK-NEXT: retq
%cmp = icmp sgt i32 %x, 42
%sel = select i1 %cmp, i32 -7, i32 2
@@ -336,12 +333,9 @@ define i32 @sel_neg1_1_32(i32 %x) {
define i8 @select_pow2_diff(i1 zeroext %cond) {
; CHECK-LABEL: select_pow2_diff:
; CHECK: # BB#0:
-; CHECK-NEXT: testb %dil, %dil
-; CHECK-NEXT: movb $19, %al
-; CHECK-NEXT: jne .LBB27_2
-; CHECK-NEXT: # BB#1:
-; CHECK-NEXT: movb $3, %al
-; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: shlb $4, %dil
+; CHECK-NEXT: orb $3, %dil
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%sel = select i1 %cond, i8 19, i8 3
ret i8 %sel
@@ -350,10 +344,11 @@ define i8 @select_pow2_diff(i1 zeroext %cond) {
define i16 @select_pow2_diff_invert(i1 zeroext %cond) {
; CHECK-LABEL: select_pow2_diff_invert:
; CHECK: # BB#0:
-; CHECK-NEXT: testb %dil, %dil
-; CHECK-NEXT: movw $7, %cx
-; CHECK-NEXT: movw $71, %ax
-; CHECK-NEXT: cmovnew %cx, %ax
+; CHECK-NEXT: xorb $1, %dil
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: shll $6, %eax
+; CHECK-NEXT: orl $7, %eax
+; CHECK-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; CHECK-NEXT: retq
%sel = select i1 %cond, i16 7, i16 71
ret i16 %sel
@@ -362,10 +357,9 @@ define i16 @select_pow2_diff_invert(i1 zeroext %cond) {
define i32 @select_pow2_diff_neg(i1 zeroext %cond) {
; CHECK-LABEL: select_pow2_diff_neg:
; CHECK: # BB#0:
-; CHECK-NEXT: testb %dil, %dil
-; CHECK-NEXT: movl $-9, %ecx
-; CHECK-NEXT: movl $-25, %eax
-; CHECK-NEXT: cmovnel %ecx, %eax
+; CHECK-NEXT: shlb $4, %dil
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: orl $-25, %eax
; CHECK-NEXT: retq
%sel = select i1 %cond, i32 -9, i32 -25
ret i32 %sel
@@ -374,10 +368,10 @@ define i32 @select_pow2_diff_neg(i1 zeroext %cond) {
define i64 @select_pow2_diff_neg_invert(i1 zeroext %cond) {
; CHECK-LABEL: select_pow2_diff_neg_invert:
; CHECK: # BB#0:
-; CHECK-NEXT: testb %dil, %dil
-; CHECK-NEXT: movl $29, %ecx
-; CHECK-NEXT: movq $-99, %rax
-; CHECK-NEXT: cmoveq %rcx, %rax
+; CHECK-NEXT: xorb $1, %dil
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: shlq $7, %rax
+; CHECK-NEXT: addq $-99, %rax
; CHECK-NEXT: retq
%sel = select i1 %cond, i64 -99, i64 29
ret i64 %sel
OpenPOWER on IntegriCloud