[x86] Fix an amazing goof in the handling of sub, or, and xor lowering.

The comment for this code indicated that it should work similar to our handling of add lowering above: if we see uses of an instruction other than flag usage and store usage, it tries to avoid the specialized X86ISD::* nodes that are designed for flag+op modeling and emits an explicit test. Problem is, only the add case actually did this. In all the other cases, the logic was incomplete and inverted. Any time the value was used by a store, we bailed on the specialized X86ISD node. All of this appears to have been historical where we had different logic here. =/ Turns out, we have quite a few patterns designed around these nodes. We should actually form them. I fixed the code to match what we do for add, and it has quite a positive effect just within some of our test cases. The only thing close to a regression I see is using: notl %r testl %r, %r instead of: xorl -1, %r But we can add a pattern or something to fold that back out. The improvements seem more than worth this. I've also worked with Craig to update the comments to no longer be actively contradicted by the code. =[ Some of this still remains a mystery to both Craig and myself, but this seems like a large step in the direction of consistency and slightly more accurate comments. Many thanks to Craig for help figuring out this nasty stuff. Differential Revision: https://reviews.llvm.org/D37096 llvm-svn: 311737
author: Chandler Carruth <chandlerc@gmail.com> 2017-08-25 00:34:07 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2017-08-25 00:34:07 +0000
commit: 8ac488b16185a70f1f2589a3d0447ddedb7f73e3 (patch)
tree: 303bd13cf721713abab0d966b76905c377decbc6 /llvm/test/CodeGen/X86/atomic-minmax-i6432.ll
parent: ea65b5aa498c42b7dc8bf44fa418ed9725871164 (diff)
download: bcm5719-llvm-8ac488b16185a70f1f2589a3d0447ddedb7f73e3.tar.gz
bcm5719-llvm-8ac488b16185a70f1f2589a3d0447ddedb7f73e3.zip
1 files changed, 36 insertions, 140 deletions
diff --git a/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll b/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll
index 565ebb59068..9a1b8d38cbe 100644
--- a/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll
+++ b/llvm/test/CodeGen/X86/atomic-minmax-i6432.ll
@@ -15,29 +15,16 @@ define i64 @atomic_max_i64() nounwind {
 ; LINUX-NEXT:    .p2align 4, 0x90
 ; LINUX-NEXT:  .LBB0_1: # %atomicrmw.start
 ; LINUX-NEXT:    # =>This Inner Loop Header: Depth=1
-; LINUX-NEXT:    xorl %ecx, %ecx
 ; LINUX-NEXT:    cmpl %eax, %esi
+; LINUX-NEXT:    movl $0, %ecx
 ; LINUX-NEXT:    sbbl %edx, %ecx
-; LINUX-NEXT:    setl %cl
-; LINUX-NEXT:    andb $1, %cl
-; LINUX-NEXT:    movl %eax, %ebx
-; LINUX-NEXT:    jne .LBB0_3
-; LINUX-NEXT:  # BB#2: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; LINUX-NEXT:    movl $0, %ecx
+; LINUX-NEXT:    cmovll %edx, %ecx
 ; LINUX-NEXT:    movl $5, %ebx
-; LINUX-NEXT:  .LBB0_3: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; LINUX-NEXT:    testb %cl, %cl
-; LINUX-NEXT:    movl %edx, %ecx
-; LINUX-NEXT:    jne .LBB0_5
-; LINUX-NEXT:  # BB#4: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB0_1 Depth=1
-; LINUX-NEXT:    xorl %ecx, %ecx
-; LINUX-NEXT:  .LBB0_5: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB0_1 Depth=1
+; LINUX-NEXT:    cmovll %eax, %ebx
 ; LINUX-NEXT:    lock cmpxchg8b sc64
 ; LINUX-NEXT:    jne .LBB0_1
-; LINUX-NEXT:  # BB#6: # %atomicrmw.end
+; LINUX-NEXT:  # BB#2: # %atomicrmw.end
 ; LINUX-NEXT:    popl %esi
 ; LINUX-NEXT:    popl %ebx
 ; LINUX-NEXT:    retl
@@ -57,29 +44,16 @@ define i64 @atomic_max_i64() nounwind {
 ; PIC-NEXT:    .p2align 4, 0x90
 ; PIC-NEXT:  LBB0_1: ## %atomicrmw.start
 ; PIC-NEXT:    ## =>This Inner Loop Header: Depth=1
-; PIC-NEXT:    xorl %ecx, %ecx
 ; PIC-NEXT:    cmpl %eax, %edi
+; PIC-NEXT:    movl $0, %ecx
 ; PIC-NEXT:    sbbl %edx, %ecx
-; PIC-NEXT:    setl %cl
-; PIC-NEXT:    andb $1, %cl
-; PIC-NEXT:    movl %eax, %ebx
-; PIC-NEXT:    jne LBB0_3
-; PIC-NEXT:  ## BB#2: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB0_1 Depth=1
+; PIC-NEXT:    movl $0, %ecx
+; PIC-NEXT:    cmovll %edx, %ecx
 ; PIC-NEXT:    movl $5, %ebx
-; PIC-NEXT:  LBB0_3: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB0_1 Depth=1
-; PIC-NEXT:    testb %cl, %cl
-; PIC-NEXT:    movl %edx, %ecx
-; PIC-NEXT:    jne LBB0_5
-; PIC-NEXT:  ## BB#4: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB0_1 Depth=1
-; PIC-NEXT:    xorl %ecx, %ecx
-; PIC-NEXT:  LBB0_5: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB0_1 Depth=1
+; PIC-NEXT:    cmovll %eax, %ebx
 ; PIC-NEXT:    lock cmpxchg8b (%esi)
 ; PIC-NEXT:    jne LBB0_1
-; PIC-NEXT:  ## BB#6: ## %atomicrmw.end
+; PIC-NEXT:  ## BB#2: ## %atomicrmw.end
 ; PIC-NEXT:    popl %esi
 ; PIC-NEXT:    popl %edi
 ; PIC-NEXT:    popl %ebx
@@ -102,26 +76,13 @@ define i64 @atomic_min_i64() nounwind {
 ; LINUX-NEXT:    cmpl $7, %eax
 ; LINUX-NEXT:    movl %edx, %ecx
 ; LINUX-NEXT:    sbbl $0, %ecx
-; LINUX-NEXT:    setl %cl
-; LINUX-NEXT:    andb $1, %cl
-; LINUX-NEXT:    movl %eax, %ebx
-; LINUX-NEXT:    jne .LBB1_3
-; LINUX-NEXT:  # BB#2: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB1_1 Depth=1
+; LINUX-NEXT:    movl $0, %ecx
+; LINUX-NEXT:    cmovll %edx, %ecx
 ; LINUX-NEXT:    movl $6, %ebx
-; LINUX-NEXT:  .LBB1_3: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB1_1 Depth=1
-; LINUX-NEXT:    testb %cl, %cl
-; LINUX-NEXT:    movl %edx, %ecx
-; LINUX-NEXT:    jne .LBB1_5
-; LINUX-NEXT:  # BB#4: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB1_1 Depth=1
-; LINUX-NEXT:    xorl %ecx, %ecx
-; LINUX-NEXT:  .LBB1_5: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB1_1 Depth=1
+; LINUX-NEXT:    cmovll %eax, %ebx
 ; LINUX-NEXT:    lock cmpxchg8b sc64
 ; LINUX-NEXT:    jne .LBB1_1
-; LINUX-NEXT:  # BB#6: # %atomicrmw.end
+; LINUX-NEXT:  # BB#2: # %atomicrmw.end
 ; LINUX-NEXT:    popl %ebx
 ; LINUX-NEXT:    retl
 ;
@@ -141,26 +102,13 @@ define i64 @atomic_min_i64() nounwind {
 ; PIC-NEXT:    cmpl $7, %eax
 ; PIC-NEXT:    movl %edx, %ecx
 ; PIC-NEXT:    sbbl $0, %ecx
-; PIC-NEXT:    setl %cl
-; PIC-NEXT:    andb $1, %cl
-; PIC-NEXT:    movl %eax, %ebx
-; PIC-NEXT:    jne LBB1_3
-; PIC-NEXT:  ## BB#2: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB1_1 Depth=1
+; PIC-NEXT:    movl $0, %ecx
+; PIC-NEXT:    cmovll %edx, %ecx
 ; PIC-NEXT:    movl $6, %ebx
-; PIC-NEXT:  LBB1_3: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB1_1 Depth=1
-; PIC-NEXT:    testb %cl, %cl
-; PIC-NEXT:    movl %edx, %ecx
-; PIC-NEXT:    jne LBB1_5
-; PIC-NEXT:  ## BB#4: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB1_1 Depth=1
-; PIC-NEXT:    xorl %ecx, %ecx
-; PIC-NEXT:  LBB1_5: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB1_1 Depth=1
+; PIC-NEXT:    cmovll %eax, %ebx
 ; PIC-NEXT:    lock cmpxchg8b (%esi)
 ; PIC-NEXT:    jne LBB1_1
-; PIC-NEXT:  ## BB#6: ## %atomicrmw.end
+; PIC-NEXT:  ## BB#2: ## %atomicrmw.end
 ; PIC-NEXT:    popl %esi
 ; PIC-NEXT:    popl %ebx
 ; PIC-NEXT:    retl
@@ -181,29 +129,16 @@ define i64 @atomic_umax_i64() nounwind {
 ; LINUX-NEXT:    .p2align 4, 0x90
 ; LINUX-NEXT:  .LBB2_1: # %atomicrmw.start
 ; LINUX-NEXT:    # =>This Inner Loop Header: Depth=1
-; LINUX-NEXT:    xorl %ecx, %ecx
 ; LINUX-NEXT:    cmpl %eax, %esi
+; LINUX-NEXT:    movl $0, %ecx
 ; LINUX-NEXT:    sbbl %edx, %ecx
-; LINUX-NEXT:    setb %cl
-; LINUX-NEXT:    andb $1, %cl
-; LINUX-NEXT:    movl %eax, %ebx
-; LINUX-NEXT:    jne .LBB2_3
-; LINUX-NEXT:  # BB#2: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB2_1 Depth=1
+; LINUX-NEXT:    movl $0, %ecx
+; LINUX-NEXT:    cmovbl %edx, %ecx
 ; LINUX-NEXT:    movl $7, %ebx
-; LINUX-NEXT:  .LBB2_3: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB2_1 Depth=1
-; LINUX-NEXT:    testb %cl, %cl
-; LINUX-NEXT:    movl %edx, %ecx
-; LINUX-NEXT:    jne .LBB2_5
-; LINUX-NEXT:  # BB#4: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB2_1 Depth=1
-; LINUX-NEXT:    xorl %ecx, %ecx
-; LINUX-NEXT:  .LBB2_5: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB2_1 Depth=1
+; LINUX-NEXT:    cmovbl %eax, %ebx
 ; LINUX-NEXT:    lock cmpxchg8b sc64
 ; LINUX-NEXT:    jne .LBB2_1
-; LINUX-NEXT:  # BB#6: # %atomicrmw.end
+; LINUX-NEXT:  # BB#2: # %atomicrmw.end
 ; LINUX-NEXT:    popl %esi
 ; LINUX-NEXT:    popl %ebx
 ; LINUX-NEXT:    retl
@@ -223,29 +158,16 @@ define i64 @atomic_umax_i64() nounwind {
 ; PIC-NEXT:    .p2align 4, 0x90
 ; PIC-NEXT:  LBB2_1: ## %atomicrmw.start
 ; PIC-NEXT:    ## =>This Inner Loop Header: Depth=1
-; PIC-NEXT:    xorl %ecx, %ecx
 ; PIC-NEXT:    cmpl %eax, %edi
+; PIC-NEXT:    movl $0, %ecx
 ; PIC-NEXT:    sbbl %edx, %ecx
-; PIC-NEXT:    setb %cl
-; PIC-NEXT:    andb $1, %cl
-; PIC-NEXT:    movl %eax, %ebx
-; PIC-NEXT:    jne LBB2_3
-; PIC-NEXT:  ## BB#2: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB2_1 Depth=1
+; PIC-NEXT:    movl $0, %ecx
+; PIC-NEXT:    cmovbl %edx, %ecx
 ; PIC-NEXT:    movl $7, %ebx
-; PIC-NEXT:  LBB2_3: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB2_1 Depth=1
-; PIC-NEXT:    testb %cl, %cl
-; PIC-NEXT:    movl %edx, %ecx
-; PIC-NEXT:    jne LBB2_5
-; PIC-NEXT:  ## BB#4: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB2_1 Depth=1
-; PIC-NEXT:    xorl %ecx, %ecx
-; PIC-NEXT:  LBB2_5: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB2_1 Depth=1
+; PIC-NEXT:    cmovbl %eax, %ebx
 ; PIC-NEXT:    lock cmpxchg8b (%esi)
 ; PIC-NEXT:    jne LBB2_1
-; PIC-NEXT:  ## BB#6: ## %atomicrmw.end
+; PIC-NEXT:  ## BB#2: ## %atomicrmw.end
 ; PIC-NEXT:    popl %esi
 ; PIC-NEXT:    popl %edi
 ; PIC-NEXT:    popl %ebx
@@ -268,26 +190,13 @@ define i64 @atomic_umin_i64() nounwind {
 ; LINUX-NEXT:    cmpl $9, %eax
 ; LINUX-NEXT:    movl %edx, %ecx
 ; LINUX-NEXT:    sbbl $0, %ecx
-; LINUX-NEXT:    setb %cl
-; LINUX-NEXT:    andb $1, %cl
-; LINUX-NEXT:    movl %eax, %ebx
-; LINUX-NEXT:    jne .LBB3_3
-; LINUX-NEXT:  # BB#2: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; LINUX-NEXT:    movl $0, %ecx
+; LINUX-NEXT:    cmovbl %edx, %ecx
 ; LINUX-NEXT:    movl $8, %ebx
-; LINUX-NEXT:  .LBB3_3: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB3_1 Depth=1
-; LINUX-NEXT:    testb %cl, %cl
-; LINUX-NEXT:    movl %edx, %ecx
-; LINUX-NEXT:    jne .LBB3_5
-; LINUX-NEXT:  # BB#4: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB3_1 Depth=1
-; LINUX-NEXT:    xorl %ecx, %ecx
-; LINUX-NEXT:  .LBB3_5: # %atomicrmw.start
-; LINUX-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; LINUX-NEXT:    cmovbl %eax, %ebx
 ; LINUX-NEXT:    lock cmpxchg8b sc64
 ; LINUX-NEXT:    jne .LBB3_1
-; LINUX-NEXT:  # BB#6: # %atomicrmw.end
+; LINUX-NEXT:  # BB#2: # %atomicrmw.end
 ; LINUX-NEXT:    popl %ebx
 ; LINUX-NEXT:    retl
 ;
@@ -307,26 +216,13 @@ define i64 @atomic_umin_i64() nounwind {
 ; PIC-NEXT:    cmpl $9, %eax
 ; PIC-NEXT:    movl %edx, %ecx
 ; PIC-NEXT:    sbbl $0, %ecx
-; PIC-NEXT:    setb %cl
-; PIC-NEXT:    andb $1, %cl
-; PIC-NEXT:    movl %eax, %ebx
-; PIC-NEXT:    jne LBB3_3
-; PIC-NEXT:  ## BB#2: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB3_1 Depth=1
+; PIC-NEXT:    movl $0, %ecx
+; PIC-NEXT:    cmovbl %edx, %ecx
 ; PIC-NEXT:    movl $8, %ebx
-; PIC-NEXT:  LBB3_3: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB3_1 Depth=1
-; PIC-NEXT:    testb %cl, %cl
-; PIC-NEXT:    movl %edx, %ecx
-; PIC-NEXT:    jne LBB3_5
-; PIC-NEXT:  ## BB#4: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB3_1 Depth=1
-; PIC-NEXT:    xorl %ecx, %ecx
-; PIC-NEXT:  LBB3_5: ## %atomicrmw.start
-; PIC-NEXT:    ## in Loop: Header=BB3_1 Depth=1
+; PIC-NEXT:    cmovbl %eax, %ebx
 ; PIC-NEXT:    lock cmpxchg8b (%esi)
 ; PIC-NEXT:    jne LBB3_1
-; PIC-NEXT:  ## BB#6: ## %atomicrmw.end
+; PIC-NEXT:  ## BB#2: ## %atomicrmw.end
 ; PIC-NEXT:    popl %esi
 ; PIC-NEXT:    popl %ebx
 ; PIC-NEXT:    retl
author	Chandler Carruth <chandlerc@gmail.com>	2017-08-25 00:34:07 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2017-08-25 00:34:07 +0000
commit	8ac488b16185a70f1f2589a3d0447ddedb7f73e3 (patch)
tree	303bd13cf721713abab0d966b76905c377decbc6 /llvm/test/CodeGen/X86/atomic-minmax-i6432.ll
parent	ea65b5aa498c42b7dc8bf44fa418ed9725871164 (diff)
download	bcm5719-llvm-8ac488b16185a70f1f2589a3d0447ddedb7f73e3.tar.gz bcm5719-llvm-8ac488b16185a70f1f2589a3d0447ddedb7f73e3.zip