[x86] Fix PR37100 by teaching the EFLAGS copy lowering to rewrite uses

across basic blocks in the limited cases where it is very straight forward to do so. This will also be useful for other places where we do some limited EFLAGS propagation across CFG edges and need to handle copy rewrites afterward. I think this is rapidly approaching the maximum we can and should be doing here. Everything else begins to require either heroic analysis to prove how to do PHI insertion manually, or somehow managing arbitrary PHI-ing of EFLAGS with general PHI insertion. Neither of these seem at all promising so if those cases come up, we'll almost certainly need to rewrite the parts of LLVM that produce those patterns. We do now require dominator trees in order to reliably diagnose patterns that would require PHI nodes. This is a bit unfortunate but it seems better than the completely mysterious crash we would get otherwise. Differential Revision: https://reviews.llvm.org/D45673 llvm-svn: 330264
author: Chandler Carruth <chandlerc@gmail.com> 2018-04-18 15:13:16 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2018-04-18 15:13:16 +0000
commit: 1f87618f8f45ae470a728c7ea2faeab40f6b3cc5 (patch)
tree: 470b45cc3fc0197f4dc95609e3bf6e1fca8f178f /llvm/test/CodeGen/X86
parent: 656444bf339a15d049e7b2f421cc4a1f16fd10ec (diff)
download: bcm5719-llvm-1f87618f8f45ae470a728c7ea2faeab40f6b3cc5.tar.gz
bcm5719-llvm-1f87618f8f45ae470a728c7ea2faeab40f6b3cc5.zip
3 files changed, 110 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll
index b79fdef2ff1..11062115a7f 100644
--- a/llvm/test/CodeGen/X86/O0-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
@@ -37,6 +37,7 @@
 ; CHECK-NEXT:       X86 PIC Global Base Reg Initialization
 ; CHECK-NEXT:       Expand ISel Pseudo-instructions
 ; CHECK-NEXT:       Local Stack Slot Allocation
+; CHECK-NEXT:       MachineDominator Tree Construction
 ; CHECK-NEXT:       X86 EFLAGS copy lowering
 ; CHECK-NEXT:       X86 WinAlloca Expander
 ; CHECK-NEXT:       Eliminate PHI nodes for register allocation
diff --git a/llvm/test/CodeGen/X86/O3-pipeline.ll b/llvm/test/CodeGen/X86/O3-pipeline.ll
index 01deba7eed1..8511474d8f4 100644
--- a/llvm/test/CodeGen/X86/O3-pipeline.ll
+++ b/llvm/test/CodeGen/X86/O3-pipeline.ll
@@ -90,6 +90,7 @@
 ; CHECK-NEXT:       X86 LEA Optimize
 ; CHECK-NEXT:       X86 Optimize Call Frame
 ; CHECK-NEXT:       X86 Avoid Store Forwarding Block
+; CHECK-NEXT:       MachineDominator Tree Construction
 ; CHECK-NEXT:       X86 EFLAGS copy lowering
 ; CHECK-NEXT:       X86 WinAlloca Expander
 ; CHECK-NEXT:       Detect Dead Lanes
diff --git a/llvm/test/CodeGen/X86/copy-eflags.ll b/llvm/test/CodeGen/X86/copy-eflags.ll
index 07d13fe2e09..9844799ac1e 100644
--- a/llvm/test/CodeGen/X86/copy-eflags.ll
+++ b/llvm/test/CodeGen/X86/copy-eflags.ll
@@ -196,3 +196,111 @@ else:
   tail call void @external_b()
   ret void
 }
+
+; Test a function that gets special select lowering into CFG with copied EFLAGS
+; threaded across the CFG. This requires our EFLAGS copy rewriting to handle
+; cross-block rewrites in at least some narrow cases.
+define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2) {
+; X32-LABEL: PR37100:
+; X32:       # %bb.0: # %bb
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    .cfi_def_cfa_offset 8
+; X32-NEXT:    pushl %ebx
+; X32-NEXT:    .cfi_def_cfa_offset 12
+; X32-NEXT:    pushl %edi
+; X32-NEXT:    .cfi_def_cfa_offset 16
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    .cfi_def_cfa_offset 20
+; X32-NEXT:    .cfi_offset %esi, -20
+; X32-NEXT:    .cfi_offset %edi, -16
+; X32-NEXT:    .cfi_offset %ebx, -12
+; X32-NEXT:    .cfi_offset %ebp, -8
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X32-NEXT:    jmp .LBB3_1
+; X32-NEXT:    .p2align 4, 0x90
+; X32-NEXT:  .LBB3_5: # %bb1
+; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; X32-NEXT:    xorl %eax, %eax
+; X32-NEXT:    xorl %edx, %edx
+; X32-NEXT:    idivl %ebp
+; X32-NEXT:  .LBB3_1: # %bb1
+; X32-NEXT:    # =>This Inner Loop Header: Depth=1
+; X32-NEXT:    movsbl %cl, %eax
+; X32-NEXT:    movl %eax, %edx
+; X32-NEXT:    sarl $31, %edx
+; X32-NEXT:    cmpl %eax, %esi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    sbbl %edx, %eax
+; X32-NEXT:    setl %al
+; X32-NEXT:    setl %dl
+; X32-NEXT:    movzbl %dl, %ebp
+; X32-NEXT:    negl %ebp
+; X32-NEXT:    testb $-1, %al
+; X32-NEXT:    jne .LBB3_3
+; X32-NEXT:  # %bb.2: # %bb1
+; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; X32-NEXT:    movb %ch, %cl
+; X32-NEXT:  .LBB3_3: # %bb1
+; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; X32-NEXT:    movb %cl, (%ebx)
+; X32-NEXT:    movl (%edi), %edx
+; X32-NEXT:    testb $-1, %al
+; X32-NEXT:    jne .LBB3_5
+; X32-NEXT:  # %bb.4: # %bb1
+; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; X32-NEXT:    movl %edx, %ebp
+; X32-NEXT:    jmp .LBB3_5
+;
+; X64-LABEL: PR37100:
+; X64:       # %bb.0: # %bb
+; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    jmp .LBB3_1
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB3_5: # %bb1
+; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    idivl %esi
+; X64-NEXT:  .LBB3_1: # %bb1
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movsbq %dil, %rax
+; X64-NEXT:    xorl %esi, %esi
+; X64-NEXT:    cmpq %rax, %r10
+; X64-NEXT:    setl %sil
+; X64-NEXT:    negl %esi
+; X64-NEXT:    cmpq %rax, %r10
+; X64-NEXT:    jl .LBB3_3
+; X64-NEXT:  # %bb.2: # %bb1
+; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; X64-NEXT:    movl %ecx, %edi
+; X64-NEXT:  .LBB3_3: # %bb1
+; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; X64-NEXT:    movb %dil, (%r8)
+; X64-NEXT:    jl .LBB3_5
+; X64-NEXT:  # %bb.4: # %bb1
+; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
+; X64-NEXT:    movl (%r9), %esi
+; X64-NEXT:    jmp .LBB3_5
+bb:
+  br label %bb1
+
+bb1:
+  %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]
+  %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]
+  %tmp3 = icmp sgt i16 %tmp2, 7
+  %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7
+  %tmp5 = sext i8 %tmp to i64
+  %tmp6 = icmp slt i64 %arg3, %tmp5
+  %tmp7 = sext i1 %tmp6 to i32
+  %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4
+  store volatile i8 %tmp8, i8* %ptr1
+  %tmp9 = load volatile i32, i32* %ptr2
+  %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9
+  %tmp11 = srem i32 0, %tmp10
+  %tmp12 = trunc i32 %tmp11 to i16
+  br label %bb1
+}
author	Chandler Carruth <chandlerc@gmail.com>	2018-04-18 15:13:16 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2018-04-18 15:13:16 +0000
commit	1f87618f8f45ae470a728c7ea2faeab40f6b3cc5 (patch)
tree	470b45cc3fc0197f4dc95609e3bf6e1fca8f178f /llvm/test/CodeGen/X86
parent	656444bf339a15d049e7b2f421cc4a1f16fd10ec (diff)
download	bcm5719-llvm-1f87618f8f45ae470a728c7ea2faeab40f6b3cc5.tar.gz bcm5719-llvm-1f87618f8f45ae470a728c7ea2faeab40f6b3cc5.zip