[x86] use more shift or LEA for select-of-constants (2nd try)

The previous rev (r310208) failed to account for overflow when subtracting the constants to see if they're suitable for shift/lea. This version add a check for that and more test were added in r310490. We can convert any select-of-constants to math ops: http://rise4fun.com/Alive/d7d For this patch, I'm enhancing an existing x86 transform that uses fake multiplies (they always become shl/lea) to avoid cmov or branching. The current code misses cases where we have a negative constant and a positive constant, so this is just trying to plug that hole. The DAGCombiner diff prevents us from hitting a terrible inefficiency: we can start with a select in IR, create a select DAG node, convert it into a sext, convert it back into a select, and then lower it to sext machine code. Some notes about the test diffs: 1. 2010-08-04-MaskedSignedCompare.ll - We were creating control flow that didn't exist in the IR. 2. memcmp.ll - Choose -1 or 1 is the case that got me looking at this again. We could avoid the push/pop in some cases if we used 'movzbl %al' instead of an xor on a different reg? That's a post-DAG problem though. 3. mul-constant-result.ll - The trade-off between sbb+not vs. setne+neg could be addressed if that's a regression, but those would always be nearly equivalent. 4. pr22338.ll and sext-i1.ll - These tests have undef operands, so we don't actually care about these diffs. 5. sbb.ll - This shows a win for what is likely a common case: choose -1 or 0. 6. select.ll - There's another borderline case here: cmp+sbb+or vs. test+set+lea? Also, sbb+not vs. setae+neg shows up again. 7. select_const.ll - These are motivating cases for the enhancement; replace cmov with cheaper ops. Assembly differences between movzbl and xor to avoid a partial reg stall are caused later by the X86 Fixup SetCC pass. Differential Revision: https://reviews.llvm.org/D35340 llvm-svn: 310717
author: Sanjay Patel <spatel@rotateright.com> 2017-08-11 15:44:14 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2017-08-11 15:44:14 +0000
commit: 169dae70a680cdfa1779148eb9cb643bb76c8b0e (patch)
tree: 83e08148cec571ed6f42847d9ccc7658a73a0f96 /llvm/test/CodeGen
parent: 1fb1ce0c87b1b2c78068488be3f624d3c0cbb19a (diff)
download: bcm5719-llvm-169dae70a680cdfa1779148eb9cb643bb76c8b0e.tar.gz
bcm5719-llvm-169dae70a680cdfa1779148eb9cb643bb76c8b0e.zip
10 files changed, 166 insertions, 205 deletions
diff --git a/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
index 66d3f3108ec..cffefc2bee6 100644
--- a/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
+++ b/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
@@ -9,21 +9,19 @@
 define i32 @main() nounwind {
 ; CHECK-LABEL: main:
 ; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    cmpq $0, {{.*}}(%rip)
-; CHECK-NEXT:    movb $-106, %al
-; CHECK-NEXT:    jne .LBB0_2
-; CHECK-NEXT:  # BB#1: # %entry
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:  .LBB0_2: # %entry
+; CHECK-NEXT:    cmpq {{.*}}(%rip), %rax
+; CHECK-NEXT:    sbbl %eax, %eax
+; CHECK-NEXT:    andl $150, %eax
 ; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    jle .LBB0_3
-; CHECK-NEXT:  # BB#4: # %if.then
+; CHECK-NEXT:    jle .LBB0_1
+; CHECK-NEXT:  # BB#2: # %if.then
 ; CHECK-NEXT:    movl $1, {{.*}}(%rip)
 ; CHECK-NEXT:    movl $1, %esi
-; CHECK-NEXT:    jmp .LBB0_5
-; CHECK-NEXT:  .LBB0_3: # %entry.if.end_crit_edge
+; CHECK-NEXT:    jmp .LBB0_3
+; CHECK-NEXT:  .LBB0_1: # %entry.if.end_crit_edge
 ; CHECK-NEXT:    movl {{.*}}(%rip), %esi
-; CHECK-NEXT:  .LBB0_5: # %if.end
+; CHECK-NEXT:  .LBB0_3: # %if.end
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    movl $.L.str, %edi
 ; CHECK-NEXT:    xorl %eax, %eax
diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll
index 80d5149af20..4489aaf6b2d 100644
--- a/llvm/test/CodeGen/X86/memcmp-optsize.ll
+++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll
@@ -125,12 +125,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind optsize {
 ; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    jmp .LBB4_3
 ; X86-NEXT:  .LBB4_1: # %res_block
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    incl %ecx
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    decl %eax
-; X86-NEXT:    cmpw %si, %dx
-; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    setae %al
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    leal -1(%eax,%eax), %eax
 ; X86-NEXT:  .LBB4_3: # %endblock
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
@@ -149,9 +146,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind optsize {
 ; X64-NEXT:    subl %ecx, %eax
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB4_1: # %res_block
-; X64-NEXT:    movl $-1, %ecx
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovbl %ecx, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    leal -1(%rax,%rax), %eax
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
   ret i32 %m
@@ -286,12 +283,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind optsize {
 ; X86-NEXT:    subl %ecx, %eax
 ; X86-NEXT:    jmp .LBB9_3
 ; X86-NEXT:  .LBB9_1: # %res_block
-; X86-NEXT:    xorl %ecx, %ecx
-; X86-NEXT:    incl %ecx
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    decl %eax
-; X86-NEXT:    cmpl %esi, %edx
-; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    setae %al
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    leal -1(%eax,%eax), %eax
 ; X86-NEXT:  .LBB9_3: # %endblock
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
@@ -310,9 +304,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind optsize {
 ; X64-NEXT:    subl %ecx, %eax
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB9_1: # %res_block
-; X64-NEXT:    movl $-1, %ecx
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovbl %ecx, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    leal -1(%rax,%rax), %eax
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
   ret i32 %m
@@ -381,12 +375,10 @@ define i32 @length8(i8* %X, i8* %Y) nounwind optsize {
 ; X86-NEXT:    cmpl %edx, %ecx
 ; X86-NEXT:    je .LBB11_3
 ; X86-NEXT:  .LBB11_1: # %res_block
-; X86-NEXT:    xorl %esi, %esi
-; X86-NEXT:    incl %esi
 ; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    decl %eax
 ; X86-NEXT:    cmpl %edx, %ecx
-; X86-NEXT:    cmovael %esi, %eax
+; X86-NEXT:    setae %al
+; X86-NEXT:    leal -1(%eax,%eax), %eax
 ; X86-NEXT:  .LBB11_3: # %endblock
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
@@ -531,10 +523,10 @@ define i32 @length12(i8* %X, i8* %Y) nounwind optsize {
 ; X64-NEXT:  # BB#3: # %endblock
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB15_1: # %res_block
+; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq %rdx, %rcx
-; X64-NEXT:    movl $-1, %ecx
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovbl %ecx, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    leal -1(%rax,%rax), %eax
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
   ret i32 %m
@@ -572,10 +564,10 @@ define i32 @length16(i8* %X, i8* %Y) nounwind optsize {
 ; X64-NEXT:  # BB#3: # %endblock
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB16_1: # %res_block
+; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq %rdx, %rcx
-; X64-NEXT:    movl $-1, %ecx
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovbl %ecx, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    leal -1(%rax,%rax), %eax
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
   ret i32 %m
diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll
index 020a9c0c333..a269529b646 100644
--- a/llvm/test/CodeGen/X86/memcmp.ll
+++ b/llvm/test/CodeGen/X86/memcmp.ll
@@ -126,9 +126,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind {
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB4_1: # %res_block
-; X86-NEXT:    movl $-1, %ecx
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    cmovbl %ecx, %eax
+; X86-NEXT:    setae %al
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    leal -1(%eax,%eax), %eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
@@ -146,9 +146,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind {
 ; X64-NEXT:    subl %ecx, %eax
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB4_1: # %res_block
-; X64-NEXT:    movl $-1, %ecx
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovbl %ecx, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    leal -1(%rax,%rax), %eax
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind
   ret i32 %m
@@ -283,9 +283,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind {
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB9_1: # %res_block
-; X86-NEXT:    movl $-1, %ecx
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    cmovbl %ecx, %eax
+; X86-NEXT:    setae %al
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    leal -1(%eax,%eax), %eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
@@ -303,9 +303,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind {
 ; X64-NEXT:    subl %ecx, %eax
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB9_1: # %res_block
-; X64-NEXT:    movl $-1, %ecx
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovbl %ecx, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    leal -1(%rax,%rax), %eax
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind
   ret i32 %m
@@ -376,10 +376,10 @@ define i32 @length8(i8* %X, i8* %Y) nounwind {
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ; X86-NEXT:  .LBB11_1: # %res_block
+; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    cmpl %edx, %ecx
-; X86-NEXT:    movl $-1, %ecx
-; X86-NEXT:    movl $1, %eax
-; X86-NEXT:    cmovbl %ecx, %eax
+; X86-NEXT:    setae %al
+; X86-NEXT:    leal -1(%eax,%eax), %eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
@@ -521,10 +521,10 @@ define i32 @length12(i8* %X, i8* %Y) nounwind {
 ; X64-NEXT:  # BB#3: # %endblock
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB15_1: # %res_block
+; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq %rdx, %rcx
-; X64-NEXT:    movl $-1, %ecx
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovbl %ecx, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    leal -1(%rax,%rax), %eax
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind
   ret i32 %m
@@ -562,10 +562,10 @@ define i32 @length16(i8* %X, i8* %Y) nounwind {
 ; X64-NEXT:  # BB#3: # %endblock
 ; X64-NEXT:    retq
 ; X64-NEXT:  .LBB16_1: # %res_block
+; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpq %rdx, %rcx
-; X64-NEXT:    movl $-1, %ecx
-; X64-NEXT:    movl $1, %eax
-; X64-NEXT:    cmovbl %ecx, %eax
+; X64-NEXT:    setae %al
+; X64-NEXT:    leal -1(%rax,%rax), %eax
 ; X64-NEXT:    retq
   %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind
   ret i32 %m
diff --git a/llvm/test/CodeGen/X86/merge-consecutive-stores.ll b/llvm/test/CodeGen/X86/merge-consecutive-stores.ll
index 42652952989..8cb6f3ae1ee 100644
--- a/llvm/test/CodeGen/X86/merge-consecutive-stores.ll
+++ b/llvm/test/CodeGen/X86/merge-consecutive-stores.ll
@@ -16,11 +16,9 @@ define i32 @foo (i64* %so) nounwind uwtable ssp {
 ; CHECK-NEXT:    cmpl 16(%eax), %edx
 ; CHECK-NEXT:    movl $0, 16(%eax)
 ; CHECK-NEXT:    sbbl %ecx, %edx
-; CHECK-NEXT:    movl $-1, %eax
-; CHECK-NEXT:    jl .LBB0_2
-; CHECK-NEXT:  # BB#1:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    setl %al
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    negl %eax
 ; CHECK-NEXT:    retl
   %used = getelementptr inbounds i64, i64* %so, i32 3
   store i64 0, i64* %used, align 8
diff --git a/llvm/test/CodeGen/X86/mul-constant-result.ll b/llvm/test/CodeGen/X86/mul-constant-result.ll
index 65d80a699e2..83a9dbe4b24 100644
--- a/llvm/test/CodeGen/X86/mul-constant-result.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-result.ll
@@ -952,12 +952,11 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X86-NEXT:  .Lcfi103:
 ; X86-NEXT:    .cfi_adjust_cfa_offset -8
 ; X86-NEXT:    xorl $32, %eax
+; X86-NEXT:    xorl %ecx, %ecx
 ; X86-NEXT:    orl %ebx, %eax
-; X86-NEXT:    movl $-1, %eax
-; X86-NEXT:    jne .LBB1_2
-; X86-NEXT:  # BB#1:
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:  .LBB1_2:
+; X86-NEXT:    setne %cl
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
@@ -974,23 +973,20 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X64-HSW-NEXT:    pushq %r14
 ; X64-HSW-NEXT:  .Lcfi2:
 ; X64-HSW-NEXT:    .cfi_def_cfa_offset 32
-; X64-HSW-NEXT:    pushq %r12
+; X64-HSW-NEXT:    pushq %rbx
 ; X64-HSW-NEXT:  .Lcfi3:
 ; X64-HSW-NEXT:    .cfi_def_cfa_offset 40
-; X64-HSW-NEXT:    pushq %rbx
+; X64-HSW-NEXT:    pushq %rax
 ; X64-HSW-NEXT:  .Lcfi4:
 ; X64-HSW-NEXT:    .cfi_def_cfa_offset 48
 ; X64-HSW-NEXT:  .Lcfi5:
-; X64-HSW-NEXT:    .cfi_offset %rbx, -48
+; X64-HSW-NEXT:    .cfi_offset %rbx, -40
 ; X64-HSW-NEXT:  .Lcfi6:
-; X64-HSW-NEXT:    .cfi_offset %r12, -40
-; X64-HSW-NEXT:  .Lcfi7:
 ; X64-HSW-NEXT:    .cfi_offset %r14, -32
-; X64-HSW-NEXT:  .Lcfi8:
+; X64-HSW-NEXT:  .Lcfi7:
 ; X64-HSW-NEXT:    .cfi_offset %r15, -24
-; X64-HSW-NEXT:  .Lcfi9:
+; X64-HSW-NEXT:  .Lcfi8:
 ; X64-HSW-NEXT:    .cfi_offset %rbp, -16
-; X64-HSW-NEXT:    xorl %r12d, %r12d
 ; X64-HSW-NEXT:    movl $1, %edi
 ; X64-HSW-NEXT:    xorl %esi, %esi
 ; X64-HSW-NEXT:    callq mult
@@ -1180,11 +1176,13 @@ define i32 @foo() local_unnamed_addr #0 {
 ; X64-HSW-NEXT:    movl $16, %esi
 ; X64-HSW-NEXT:    callq mult
 ; X64-HSW-NEXT:    xorl $32, %eax
+; X64-HSW-NEXT:    xorl %ecx, %ecx
 ; X64-HSW-NEXT:    orl %ebx, %eax
-; X64-HSW-NEXT:    movl $-1, %eax
-; X64-HSW-NEXT:    cmovel %r12d, %eax
+; X64-HSW-NEXT:    setne %cl
+; X64-HSW-NEXT:    negl %ecx
+; X64-HSW-NEXT:    movl %ecx, %eax
+; X64-HSW-NEXT:    addq $8, %rsp
 ; X64-HSW-NEXT:    popq %rbx
-; X64-HSW-NEXT:    popq %r12
 ; X64-HSW-NEXT:    popq %r14
 ; X64-HSW-NEXT:    popq %r15
 ; X64-HSW-NEXT:    popq %rbp
diff --git a/llvm/test/CodeGen/X86/pr22338.ll b/llvm/test/CodeGen/X86/pr22338.ll
index e0645d1ef55..41430f5af99 100644
--- a/llvm/test/CodeGen/X86/pr22338.ll
+++ b/llvm/test/CodeGen/X86/pr22338.ll
@@ -5,30 +5,28 @@
 define i32 @fn() {
 ; X86-LABEL: fn:
 ; X86:       # BB#0: # %entry
+; X86-NEXT:    xorl %eax, %eax
 ; X86-NEXT:    cmpl $1, %eax
+; X86-NEXT:    setne %al
 ; X86-NEXT:    sete %cl
-; X86-NEXT:    movl $-1, %eax
-; X86-NEXT:    jne .LBB0_2
-; X86-NEXT:  # BB#1: # %entry
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:  .LBB0_2: # %entry
+; X86-NEXT:    negl %eax
 ; X86-NEXT:    addb %cl, %cl
 ; X86-NEXT:    shll %cl, %eax
 ; X86-NEXT:    .p2align 4, 0x90
-; X86-NEXT:  .LBB0_3: # %bb1
+; X86-NEXT:  .LBB0_1: # %bb1
 ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
 ; X86-NEXT:    testl %eax, %eax
-; X86-NEXT:    je .LBB0_3
-; X86-NEXT:  # BB#4: # %bb2
+; X86-NEXT:    je .LBB0_1
+; X86-NEXT:  # BB#2: # %bb2
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fn:
 ; X64:       # BB#0: # %entry
-; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    cmpl $1, %eax
+; X64-NEXT:    setne %al
 ; X64-NEXT:    sete %cl
-; X64-NEXT:    movl $-1, %eax
-; X64-NEXT:    cmovel %edx, %eax
+; X64-NEXT:    negl %eax
 ; X64-NEXT:    addb %cl, %cl
 ; X64-NEXT:    shll %cl, %eax
 ; X64-NEXT:    .p2align 4, 0x90
diff --git a/llvm/test/CodeGen/X86/sbb.ll b/llvm/test/CodeGen/X86/sbb.ll
index b6e8ebf6ed0..7429c0777a4 100644
--- a/llvm/test/CodeGen/X86/sbb.ll
+++ b/llvm/test/CodeGen/X86/sbb.ll
@@ -130,10 +130,8 @@ define i32 @ult_select_neg1_or_0(i32 %x, i32 %y) nounwind {
 define i32 @ugt_select_neg1_or_0(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: ugt_select_neg1_or_0:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    cmpl %edi, %esi
-; CHECK-NEXT:    movl $-1, %eax
-; CHECK-NEXT:    cmovbel %ecx, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp ugt i32 %y, %x
   %ext = sext i1 %cmp to i32
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index ec15d1a9520..b37644e2291 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -545,12 +545,11 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ;
 ; MCU-LABEL: test9b:
 ; MCU:       # BB#0:
-; MCU-NEXT:    orl %edx, %eax
-; MCU-NEXT:    movl $-1, %edx
-; MCU-NEXT:    je .LBB10_2
-; MCU-NEXT:  # BB#1:
+; MCU-NEXT:    movl %edx, %ecx
 ; MCU-NEXT:    xorl %edx, %edx
-; MCU-NEXT:  .LBB10_2:
+; MCU-NEXT:    orl %ecx, %eax
+; MCU-NEXT:    sete %dl
+; MCU-NEXT:    negl %edx
 ; MCU-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; MCU-NEXT:    orl %edx, %eax
 ; MCU-NEXT:    orl {{[0-9]+}}(%esp), %edx
@@ -563,23 +562,14 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 
 ;; Select between -1 and 1.
 define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
-; GENERIC-LABEL: test10:
-; GENERIC:       ## BB#0:
-; GENERIC-NEXT:    cmpq $1, %rdi
-; GENERIC-NEXT:    sbbq %rax, %rax
-; GENERIC-NEXT:    orq $1, %rax
-; GENERIC-NEXT:    retq
-; GENERIC-NEXT:    ## -- End function
-;
-; ATOM-LABEL: test10:
-; ATOM:       ## BB#0:
-; ATOM-NEXT:    cmpq $1, %rdi
-; ATOM-NEXT:    sbbq %rax, %rax
-; ATOM-NEXT:    orq $1, %rax
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    retq
-; ATOM-NEXT:    ## -- End function
+; CHECK-LABEL: test10:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testq %rdi, %rdi
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    leaq -1(%rax,%rax), %rax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:    ## -- End function
 ;
 ; MCU-LABEL: test10:
 ; MCU:       # BB#0:
@@ -747,29 +737,22 @@ define i32 @test13(i32 %a, i32 %b) nounwind {
 }
 
 define i32 @test14(i32 %a, i32 %b) nounwind {
-; GENERIC-LABEL: test14:
-; GENERIC:       ## BB#0:
-; GENERIC-NEXT:    cmpl %esi, %edi
-; GENERIC-NEXT:    sbbl %eax, %eax
-; GENERIC-NEXT:    notl %eax
-; GENERIC-NEXT:    retq
-; GENERIC-NEXT:    ## -- End function
-;
-; ATOM-LABEL: test14:
-; ATOM:       ## BB#0:
-; ATOM-NEXT:    cmpl %esi, %edi
-; ATOM-NEXT:    sbbl %eax, %eax
-; ATOM-NEXT:    notl %eax
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    retq
-; ATOM-NEXT:    ## -- End function
+; CHECK-LABEL: test14:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    negl %eax
+; CHECK-NEXT:    retq
+; CHECK-NEXT:    ## -- End function
 ;
 ; MCU-LABEL: test14:
 ; MCU:       # BB#0:
+; MCU-NEXT:    xorl %ecx, %ecx
 ; MCU-NEXT:    cmpl %edx, %eax
-; MCU-NEXT:    sbbl %eax, %eax
-; MCU-NEXT:    notl %eax
+; MCU-NEXT:    setae %cl
+; MCU-NEXT:    negl %ecx
+; MCU-NEXT:    movl %ecx, %eax
 ; MCU-NEXT:    retl
   %c = icmp uge i32 %a, %b
   %d = sext i1 %c to i32
@@ -826,12 +809,11 @@ define i64 @test16(i64 %x) nounwind uwtable readnone ssp {
 ;
 ; MCU-LABEL: test16:
 ; MCU:       # BB#0: # %entry
-; MCU-NEXT:    orl %edx, %eax
-; MCU-NEXT:    movl $-1, %eax
-; MCU-NEXT:    jne .LBB18_2
-; MCU-NEXT:  # BB#1: # %entry
+; MCU-NEXT:    movl %eax, %ecx
 ; MCU-NEXT:    xorl %eax, %eax
-; MCU-NEXT:  .LBB18_2: # %entry
+; MCU-NEXT:    orl %edx, %ecx
+; MCU-NEXT:    setne %al
+; MCU-NEXT:    negl %eax
 ; MCU-NEXT:    movl %eax, %edx
 ; MCU-NEXT:    retl
 entry:
@@ -844,14 +826,16 @@ define i16 @test17(i16 %x) nounwind {
 ; GENERIC-LABEL: test17:
 ; GENERIC:       ## BB#0: ## %entry
 ; GENERIC-NEXT:    negw %di
-; GENERIC-NEXT:    sbbw %ax, %ax
+; GENERIC-NEXT:    sbbl %eax, %eax
+; GENERIC-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
 ; GENERIC-NEXT:    retq
 ; GENERIC-NEXT:    ## -- End function
 ;
 ; ATOM-LABEL: test17:
 ; ATOM:       ## BB#0: ## %entry
 ; ATOM-NEXT:    negw %di
-; ATOM-NEXT:    sbbw %ax, %ax
+; ATOM-NEXT:    sbbl %eax, %eax
+; ATOM-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
 ; ATOM-NEXT:    nop
 ; ATOM-NEXT:    nop
 ; ATOM-NEXT:    nop
@@ -862,7 +846,8 @@ define i16 @test17(i16 %x) nounwind {
 ; MCU-LABEL: test17:
 ; MCU:       # BB#0: # %entry
 ; MCU-NEXT:    negw %ax
-; MCU-NEXT:    sbbw %ax, %ax
+; MCU-NEXT:    sbbl %eax, %eax
+; MCU-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; MCU-NEXT:    retl
 entry:
   %cmp = icmp ne i16 %x, 0
diff --git a/llvm/test/CodeGen/X86/select_const.ll b/llvm/test/CodeGen/X86/select_const.ll
index b439d26b286..7d13cc7b788 100644
--- a/llvm/test/CodeGen/X86/select_const.ll
+++ b/llvm/test/CodeGen/X86/select_const.ll
@@ -211,10 +211,9 @@ define i32 @select_C_Cplus1_signext(i1 signext %cond) {
 define i32 @select_lea_2(i1 zeroext %cond) {
 ; CHECK-LABEL: select_lea_2:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    testb %dil, %dil
-; CHECK-NEXT:    movl $-1, %ecx
-; CHECK-NEXT:    movl $1, %eax
-; CHECK-NEXT:    cmovnel %ecx, %eax
+; CHECK-NEXT:    xorb $1, %dil
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    leal -1(%rax,%rax), %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i32 -1, i32 1
   ret i32 %sel
@@ -223,10 +222,9 @@ define i32 @select_lea_2(i1 zeroext %cond) {
 define i64 @select_lea_3(i1 zeroext %cond) {
 ; CHECK-LABEL: select_lea_3:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    testb %dil, %dil
-; CHECK-NEXT:    movl $1, %ecx
-; CHECK-NEXT:    movq $-2, %rax
-; CHECK-NEXT:    cmoveq %rcx, %rax
+; CHECK-NEXT:    xorb $1, %dil
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    leaq -2(%rax,%rax,2), %rax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i64 -2, i64 1
   ret i64 %sel
@@ -235,10 +233,9 @@ define i64 @select_lea_3(i1 zeroext %cond) {
 define i32 @select_lea_5(i1 zeroext %cond) {
 ; CHECK-LABEL: select_lea_5:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    testb %dil, %dil
-; CHECK-NEXT:    movl $-2, %ecx
-; CHECK-NEXT:    movl $3, %eax
-; CHECK-NEXT:    cmovnel %ecx, %eax
+; CHECK-NEXT:    xorb $1, %dil
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    leal -2(%rax,%rax,4), %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i32 -2, i32 3
   ret i32 %sel
@@ -247,10 +244,9 @@ define i32 @select_lea_5(i1 zeroext %cond) {
 define i64 @select_lea_9(i1 zeroext %cond) {
 ; CHECK-LABEL: select_lea_9:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    testb %dil, %dil
-; CHECK-NEXT:    movl $2, %ecx
-; CHECK-NEXT:    movq $-7, %rax
-; CHECK-NEXT:    cmoveq %rcx, %rax
+; CHECK-NEXT:    xorb $1, %dil
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    leaq -7(%rax,%rax,8), %rax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i64 -7, i64 2
   ret i64 %sel
@@ -262,8 +258,8 @@ define i64 @sel_1_2(i64 %x, i64 %y) {
 ; CHECK-LABEL: sel_1_2:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    cmpq $42, %rdi
-; CHECK-NEXT:    sbbq %rax, %rax
-; CHECK-NEXT:    leaq 2(%rax,%rsi), %rax
+; CHECK-NEXT:    sbbq $0, %rsi
+; CHECK-NEXT:    leaq 2(%rsi), %rax
 ; CHECK-NEXT:    retq
   %cmp = icmp ult i64 %x, 42
   %sel = select i1 %cmp, i64 1, i64 2
@@ -271,30 +267,31 @@ define i64 @sel_1_2(i64 %x, i64 %y) {
   ret i64 %sub
 }
 
-; No LEA with 8-bit or 16-bit, but this shouldn't need branches or cmov.
+; No LEA with 8-bit, but this shouldn't need branches or cmov.
 
 define i8 @sel_1_neg1(i32 %x) {
 ; CHECK-LABEL: sel_1_neg1:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    cmpl $42, %edi
-; CHECK-NEXT:    movb $3, %al
-; CHECK-NEXT:    jg .LBB23_2
-; CHECK-NEXT:  # BB#1:
-; CHECK-NEXT:    movb $-1, %al
-; CHECK-NEXT:  .LBB23_2:
+; CHECK-NEXT:    setg %al
+; CHECK-NEXT:    shlb $2, %al
+; CHECK-NEXT:    decb %al
 ; CHECK-NEXT:    retq
   %cmp = icmp sgt i32 %x, 42
   %sel = select i1 %cmp, i8 3, i8 -1
   ret i8 %sel
 }
 
+; We get an LEA for 16-bit because we ignore the high-bits.
+
 define i16 @sel_neg1_1(i32 %x) {
 ; CHECK-LABEL: sel_neg1_1:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    cmpl $42, %edi
-; CHECK-NEXT:    movw $-1, %cx
-; CHECK-NEXT:    movw $3, %ax
-; CHECK-NEXT:    cmovgw %cx, %ax
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl $43, %edi
+; CHECK-NEXT:    setl %al
+; CHECK-NEXT:    leal -1(,%rax,4), %eax
+; CHECK-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; CHECK-NEXT:    retq
   %cmp = icmp sgt i32 %x, 42
   %sel = select i1 %cmp, i16 -1, i16 3
@@ -306,10 +303,10 @@ define i16 @sel_neg1_1(i32 %x) {
 define i32 @sel_1_neg1_32(i32 %x) {
 ; CHECK-LABEL: sel_1_neg1_32:
 ; CHECK:       # BB#0:
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $42, %edi
-; CHECK-NEXT:    movl $8, %ecx
-; CHECK-NEXT:    movl $-1, %eax
-; CHECK-NEXT:    cmovgl %ecx, %eax
+; CHECK-NEXT:    setg %al
+; CHECK-NEXT:    leal -1(%rax,%rax,8), %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp sgt i32 %x, 42
   %sel = select i1 %cmp, i32 8, i32 -1
@@ -319,10 +316,10 @@ define i32 @sel_1_neg1_32(i32 %x) {
 define i32 @sel_neg1_1_32(i32 %x) {
 ; CHECK-LABEL: sel_neg1_1_32:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    cmpl $42, %edi
-; CHECK-NEXT:    movl $-7, %ecx
-; CHECK-NEXT:    movl $2, %eax
-; CHECK-NEXT:    cmovgl %ecx, %eax
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl $43, %edi
+; CHECK-NEXT:    setl %al
+; CHECK-NEXT:    leal -7(%rax,%rax,8), %eax
 ; CHECK-NEXT:    retq
   %cmp = icmp sgt i32 %x, 42
   %sel = select i1 %cmp, i32 -7, i32 2
@@ -336,12 +333,9 @@ define i32 @sel_neg1_1_32(i32 %x) {
 define i8 @select_pow2_diff(i1 zeroext %cond) {
 ; CHECK-LABEL: select_pow2_diff:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    testb %dil, %dil
-; CHECK-NEXT:    movb $19, %al
-; CHECK-NEXT:    jne .LBB27_2
-; CHECK-NEXT:  # BB#1:
-; CHECK-NEXT:    movb $3, %al
-; CHECK-NEXT:  .LBB27_2:
+; CHECK-NEXT:    shlb $4, %dil
+; CHECK-NEXT:    orb $3, %dil
+; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i8 19, i8 3
   ret i8 %sel
@@ -350,10 +344,11 @@ define i8 @select_pow2_diff(i1 zeroext %cond) {
 define i16 @select_pow2_diff_invert(i1 zeroext %cond) {
 ; CHECK-LABEL: select_pow2_diff_invert:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    testb %dil, %dil
-; CHECK-NEXT:    movw $7, %cx
-; CHECK-NEXT:    movw $71, %ax
-; CHECK-NEXT:    cmovnew %cx, %ax
+; CHECK-NEXT:    xorb $1, %dil
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    shll $6, %eax
+; CHECK-NEXT:    orl $7, %eax
+; CHECK-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i16 7, i16 71
   ret i16 %sel
@@ -362,10 +357,9 @@ define i16 @select_pow2_diff_invert(i1 zeroext %cond) {
 define i32 @select_pow2_diff_neg(i1 zeroext %cond) {
 ; CHECK-LABEL: select_pow2_diff_neg:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    testb %dil, %dil
-; CHECK-NEXT:    movl $-9, %ecx
-; CHECK-NEXT:    movl $-25, %eax
-; CHECK-NEXT:    cmovnel %ecx, %eax
+; CHECK-NEXT:    shlb $4, %dil
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    orl $-25, %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i32 -9, i32 -25
   ret i32 %sel
@@ -374,10 +368,10 @@ define i32 @select_pow2_diff_neg(i1 zeroext %cond) {
 define i64 @select_pow2_diff_neg_invert(i1 zeroext %cond) {
 ; CHECK-LABEL: select_pow2_diff_neg_invert:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    testb %dil, %dil
-; CHECK-NEXT:    movl $29, %ecx
-; CHECK-NEXT:    movq $-99, %rax
-; CHECK-NEXT:    cmoveq %rcx, %rax
+; CHECK-NEXT:    xorb $1, %dil
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    shlq $7, %rax
+; CHECK-NEXT:    addq $-99, %rax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i64 -99, i64 29
   ret i64 %sel
diff --git a/llvm/test/CodeGen/X86/sext-i1.ll b/llvm/test/CodeGen/X86/sext-i1.ll
index 8c92434db21..d159fe17422 100644
--- a/llvm/test/CodeGen/X86/sext-i1.ll
+++ b/llvm/test/CodeGen/X86/sext-i1.ll
@@ -51,8 +51,10 @@ define i32 @t3() nounwind readonly {
 ;
 ; X64-LABEL: t3:
 ; X64:       # BB#0: # %entry
-; X64-NEXT:    cmpl $1, %eax
-; X64-NEXT:    sbbq %rax, %rax
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    testl %eax, %eax
+; X64-NEXT:    sete %al
+; X64-NEXT:    negq %rax
 ; X64-NEXT:    cmpq %rax, %rax
 ; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    retq
@@ -75,13 +77,11 @@ if.end:
 define i32 @t4(i64 %x) nounwind readnone ssp {
 ; X32-LABEL: t4:
 ; X32:       # BB#0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl $-1, %eax
-; X32-NEXT:    je .LBB3_2
-; X32-NEXT:  # BB#1:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    xorl %eax, %eax
-; X32-NEXT:  .LBB3_2:
+; X32-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    sete %al
+; X32-NEXT:    negl %eax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: t4:
author	Sanjay Patel <spatel@rotateright.com>	2017-08-11 15:44:14 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2017-08-11 15:44:14 +0000
commit	169dae70a680cdfa1779148eb9cb643bb76c8b0e (patch)
tree	83e08148cec571ed6f42847d9ccc7658a73a0f96 /llvm/test/CodeGen
parent	1fb1ce0c87b1b2c78068488be3f624d3c0cbb19a (diff)
download	bcm5719-llvm-169dae70a680cdfa1779148eb9cb643bb76c8b0e.tar.gz bcm5719-llvm-169dae70a680cdfa1779148eb9cb643bb76c8b0e.zip