diff options
| author | Chandler Carruth <chandlerc@gmail.com> | 2018-04-18 15:13:16 +0000 |
|---|---|---|
| committer | Chandler Carruth <chandlerc@gmail.com> | 2018-04-18 15:13:16 +0000 |
| commit | 1f87618f8f45ae470a728c7ea2faeab40f6b3cc5 (patch) | |
| tree | 470b45cc3fc0197f4dc95609e3bf6e1fca8f178f /llvm/test/CodeGen/X86 | |
| parent | 656444bf339a15d049e7b2f421cc4a1f16fd10ec (diff) | |
| download | bcm5719-llvm-1f87618f8f45ae470a728c7ea2faeab40f6b3cc5.tar.gz bcm5719-llvm-1f87618f8f45ae470a728c7ea2faeab40f6b3cc5.zip | |
[x86] Fix PR37100 by teaching the EFLAGS copy lowering to rewrite uses
across basic blocks in the limited cases where it is very straight
forward to do so.
This will also be useful for other places where we do some limited
EFLAGS propagation across CFG edges and need to handle copy rewrites
afterward. I think this is rapidly approaching the maximum we can and
should be doing here. Everything else begins to require either heroic
analysis to prove how to do PHI insertion manually, or somehow managing
arbitrary PHI-ing of EFLAGS with general PHI insertion. Neither of these
seem at all promising so if those cases come up, we'll almost certainly
need to rewrite the parts of LLVM that produce those patterns.
We do now require dominator trees in order to reliably diagnose patterns
that would require PHI nodes. This is a bit unfortunate but it seems
better than the completely mysterious crash we would get otherwise.
Differential Revision: https://reviews.llvm.org/D45673
llvm-svn: 330264
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/O0-pipeline.ll | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/O3-pipeline.ll | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/copy-eflags.ll | 108 |
3 files changed, 110 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index b79fdef2ff1..11062115a7f 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -37,6 +37,7 @@ ; CHECK-NEXT: X86 PIC Global Base Reg Initialization ; CHECK-NEXT: Expand ISel Pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation +; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: X86 EFLAGS copy lowering ; CHECK-NEXT: X86 WinAlloca Expander ; CHECK-NEXT: Eliminate PHI nodes for register allocation diff --git a/llvm/test/CodeGen/X86/O3-pipeline.ll b/llvm/test/CodeGen/X86/O3-pipeline.ll index 01deba7eed1..8511474d8f4 100644 --- a/llvm/test/CodeGen/X86/O3-pipeline.ll +++ b/llvm/test/CodeGen/X86/O3-pipeline.ll @@ -90,6 +90,7 @@ ; CHECK-NEXT: X86 LEA Optimize ; CHECK-NEXT: X86 Optimize Call Frame ; CHECK-NEXT: X86 Avoid Store Forwarding Block +; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: X86 EFLAGS copy lowering ; CHECK-NEXT: X86 WinAlloca Expander ; CHECK-NEXT: Detect Dead Lanes diff --git a/llvm/test/CodeGen/X86/copy-eflags.ll b/llvm/test/CodeGen/X86/copy-eflags.ll index 07d13fe2e09..9844799ac1e 100644 --- a/llvm/test/CodeGen/X86/copy-eflags.ll +++ b/llvm/test/CodeGen/X86/copy-eflags.ll @@ -196,3 +196,111 @@ else: tail call void @external_b() ret void } + +; Test a function that gets special select lowering into CFG with copied EFLAGS +; threaded across the CFG. This requires our EFLAGS copy rewriting to handle +; cross-block rewrites in at least some narrow cases. +define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2) { +; X32-LABEL: PR37100: +; X32: # %bb.0: # %bb +; X32-NEXT: pushl %ebp +; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: pushl %ebx +; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: pushl %edi +; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: pushl %esi +; X32-NEXT: .cfi_def_cfa_offset 20 +; X32-NEXT: .cfi_offset %esi, -20 +; X32-NEXT: .cfi_offset %edi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %edi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: movb {{[0-9]+}}(%esp), %ch +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; X32-NEXT: jmp .LBB3_1 +; X32-NEXT: .p2align 4, 0x90 +; X32-NEXT: .LBB3_5: # %bb1 +; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %edx, %edx +; X32-NEXT: idivl %ebp +; X32-NEXT: .LBB3_1: # %bb1 +; X32-NEXT: # =>This Inner Loop Header: Depth=1 +; X32-NEXT: movsbl %cl, %eax +; X32-NEXT: movl %eax, %edx +; X32-NEXT: sarl $31, %edx +; X32-NEXT: cmpl %eax, %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: sbbl %edx, %eax +; X32-NEXT: setl %al +; X32-NEXT: setl %dl +; X32-NEXT: movzbl %dl, %ebp +; X32-NEXT: negl %ebp +; X32-NEXT: testb $-1, %al +; X32-NEXT: jne .LBB3_3 +; X32-NEXT: # %bb.2: # %bb1 +; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X32-NEXT: movb %ch, %cl +; X32-NEXT: .LBB3_3: # %bb1 +; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X32-NEXT: movb %cl, (%ebx) +; X32-NEXT: movl (%edi), %edx +; X32-NEXT: testb $-1, %al +; X32-NEXT: jne .LBB3_5 +; X32-NEXT: # %bb.4: # %bb1 +; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X32-NEXT: movl %edx, %ebp +; X32-NEXT: jmp .LBB3_5 +; +; X64-LABEL: PR37100: +; X64: # %bb.0: # %bb +; X64-NEXT: movq %rdx, %r10 +; X64-NEXT: jmp .LBB3_1 +; X64-NEXT: .p2align 4, 0x90 +; X64-NEXT: .LBB3_5: # %bb1 +; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: idivl %esi +; X64-NEXT: .LBB3_1: # %bb1 +; X64-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NEXT: movsbq %dil, %rax +; X64-NEXT: xorl %esi, %esi +; X64-NEXT: cmpq %rax, %r10 +; X64-NEXT: setl %sil +; X64-NEXT: negl %esi +; X64-NEXT: cmpq %rax, %r10 +; X64-NEXT: jl .LBB3_3 +; X64-NEXT: # %bb.2: # %bb1 +; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X64-NEXT: movl %ecx, %edi +; X64-NEXT: .LBB3_3: # %bb1 +; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X64-NEXT: movb %dil, (%r8) +; X64-NEXT: jl .LBB3_5 +; X64-NEXT: # %bb.4: # %bb1 +; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; X64-NEXT: movl (%r9), %esi +; X64-NEXT: jmp .LBB3_5 +bb: + br label %bb1 + +bb1: + %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ] + %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ] + %tmp3 = icmp sgt i16 %tmp2, 7 + %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7 + %tmp5 = sext i8 %tmp to i64 + %tmp6 = icmp slt i64 %arg3, %tmp5 + %tmp7 = sext i1 %tmp6 to i32 + %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4 + store volatile i8 %tmp8, i8* %ptr1 + %tmp9 = load volatile i32, i32* %ptr2 + %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9 + %tmp11 = srem i32 0, %tmp10 + %tmp12 = trunc i32 %tmp11 to i16 + br label %bb1 +} |

