diff options
author | David Bolvansky <david.bolvansky@gmail.com> | 2019-06-12 15:01:36 +0000 |
---|---|---|
committer | David Bolvansky <david.bolvansky@gmail.com> | 2019-06-12 15:01:36 +0000 |
commit | 48365ec3e14a16db950cfac5cec126d65f3d774e (patch) | |
tree | c0a1312163210ba116ca7171e78130db17c32032 | |
parent | 61a7ab7fdb28ed906f061b4f22685156f94fe3a2 (diff) | |
download | bcm5719-llvm-48365ec3e14a16db950cfac5cec126d65f3d774e.tar.gz bcm5719-llvm-48365ec3e14a16db950cfac5cec126d65f3d774e.zip |
[NFC[ Updated tests for D54411
llvm-svn: 363173
-rw-r--r-- | llvm/test/CodeGen/X86/conditional-tailcall.ll | 476 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/tail-merge-after-mbp.mir | 68 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/tail-opts.ll | 408 |
3 files changed, 812 insertions, 140 deletions
diff --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll index c00ce75b26d..c7a555b37d4 100644 --- a/llvm/test/CodeGen/X86/conditional-tailcall.ll +++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-linux -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32 ; RUN: llc < %s -mtriple=x86_64-linux -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64 ; RUN: llc < %s -mtriple=x86_64-win32 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=WIN64 @@ -6,6 +7,39 @@ declare void @foo() declare void @bar() define void @f(i32 %x, i32 %y) optsize { +; CHECK32-LABEL: f: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; CHECK32-NEXT: cmpl {{[0-9]+}}(%esp), %eax # encoding: [0x3b,0x44,0x24,0x08] +; CHECK32-NEXT: jne bar # TAILCALL +; CHECK32-NEXT: # encoding: [0x75,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.1: # %bb1 +; CHECK32-NEXT: jmp foo # TAILCALL +; CHECK32-NEXT: # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 +; +; CHECK64-LABEL: f: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7] +; CHECK64-NEXT: jne bar # TAILCALL +; CHECK64-NEXT: # encoding: [0x75,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.1: # %bb1 +; CHECK64-NEXT: jmp foo # TAILCALL +; CHECK64-NEXT: # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 +; +; WIN64-LABEL: f: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: cmpl %edx, %ecx # encoding: [0x39,0xd1] +; WIN64-NEXT: jne bar # TAILCALL +; WIN64-NEXT: # encoding: [0x75,A] +; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.1: # %bb1 +; WIN64-NEXT: jmp foo # TAILCALL +; WIN64-NEXT: # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 entry: %p = icmp eq i32 %x, %y br i1 %p, label %bb1, label %bb2 @@ -16,15 +50,83 @@ bb2: tail call void @bar() ret void -; CHECK-LABEL: f: -; CHECK: cmp -; CHECK: jne bar ; Check that the asm doesn't just look good, but uses the correct encoding. -; CHECK: encoding: [0x75,A] -; CHECK: jmp foo } define void @f_non_leaf(i32 %x, i32 %y) optsize { +; CHECK32-LABEL: f_non_leaf: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: pushl %ebx # encoding: [0x53] +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: .cfi_offset %ebx, -8 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; CHECK32-NEXT: #APP +; CHECK32-NEXT: #NO_APP +; CHECK32-NEXT: cmpl {{[0-9]+}}(%esp), %eax # encoding: [0x3b,0x44,0x24,0x0c] +; CHECK32-NEXT: jne .LBB1_2 # encoding: [0x75,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.1: # %bb1 +; CHECK32-NEXT: popl %ebx # encoding: [0x5b] +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: jmp foo # TAILCALL +; CHECK32-NEXT: # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB1_2: # %bb2 +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %ebx # encoding: [0x5b] +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: jmp bar # TAILCALL +; CHECK32-NEXT: # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; +; CHECK64-LABEL: f_non_leaf: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: pushq %rbx # encoding: [0x53] +; CHECK64-NEXT: .cfi_def_cfa_offset 16 +; CHECK64-NEXT: .cfi_offset %rbx, -16 +; CHECK64-NEXT: #APP +; CHECK64-NEXT: #NO_APP +; CHECK64-NEXT: cmpl %esi, %edi # encoding: [0x39,0xf7] +; CHECK64-NEXT: jne .LBB1_2 # encoding: [0x75,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.1: # %bb1 +; CHECK64-NEXT: popq %rbx # encoding: [0x5b] +; CHECK64-NEXT: .cfi_def_cfa_offset 8 +; CHECK64-NEXT: jmp foo # TAILCALL +; CHECK64-NEXT: # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB1_2: # %bb2 +; CHECK64-NEXT: .cfi_def_cfa_offset 16 +; CHECK64-NEXT: popq %rbx # encoding: [0x5b] +; CHECK64-NEXT: .cfi_def_cfa_offset 8 +; CHECK64-NEXT: jmp bar # TAILCALL +; CHECK64-NEXT: # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; +; WIN64-LABEL: f_non_leaf: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: pushq %rbx # encoding: [0x53] +; WIN64-NEXT: .seh_pushreg 3 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: #APP +; WIN64-NEXT: #NO_APP +; WIN64-NEXT: cmpl %edx, %ecx # encoding: [0x39,0xd1] +; WIN64-NEXT: jne .LBB1_2 # encoding: [0x75,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.1: # %bb1 +; WIN64-NEXT: popq %rbx # encoding: [0x5b] +; WIN64-NEXT: jmp foo # TAILCALL +; WIN64-NEXT: # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: foo-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB1_2: # %bb2 +; WIN64-NEXT: nop # encoding: [0x90] +; WIN64-NEXT: popq %rbx # encoding: [0x5b] +; WIN64-NEXT: jmp bar # TAILCALL +; WIN64-NEXT: # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: bar-1, kind: FK_PCRel_1 +; WIN64-NEXT: .seh_handlerdata +; WIN64-NEXT: .text +; WIN64-NEXT: .seh_endproc entry: ; Force %ebx to be spilled on the stack, turning this into ; not a "leaf" function for Win64. @@ -39,16 +141,67 @@ bb2: tail call void @bar() ret void -; CHECK-LABEL: f_non_leaf: -; WIN64-NOT: je foo -; WIN64-NOT: jne bar -; WIN64: jne -; WIN64: jmp foo -; WIN64: jmp bar } declare x86_thiscallcc zeroext i1 @baz(i8*, i32) define x86_thiscallcc zeroext i1 @BlockPlacementTest(i8* %this, i32 %x) optsize { +; CHECK32-LABEL: BlockPlacementTest: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04] +; CHECK32-NEXT: testb $42, %dl # encoding: [0xf6,0xc2,0x2a] +; CHECK32-NEXT: je .LBB2_1 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.2: # %land.rhs +; CHECK32-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK32-NEXT: testb $44, %dl # encoding: [0xf6,0xc2,0x2c] +; CHECK32-NEXT: je baz # TAILCALL +; CHECK32-NEXT: # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.3: # %land.end +; CHECK32-NEXT: # kill: def $al killed $al killed $eax +; CHECK32-NEXT: retl $4 # encoding: [0xc2,0x04,0x00] +; CHECK32-NEXT: .LBB2_1: +; CHECK32-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK32-NEXT: # kill: def $al killed $al killed $eax +; CHECK32-NEXT: retl $4 # encoding: [0xc2,0x04,0x00] +; +; CHECK64-LABEL: BlockPlacementTest: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: testb $42, %sil # encoding: [0x40,0xf6,0xc6,0x2a] +; CHECK64-NEXT: je .LBB2_1 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.2: # %land.rhs +; CHECK64-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; CHECK64-NEXT: testb $44, %sil # encoding: [0x40,0xf6,0xc6,0x2c] +; CHECK64-NEXT: je baz # TAILCALL +; CHECK64-NEXT: # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.3: # %land.end +; CHECK64-NEXT: # kill: def $al killed $al killed $eax +; CHECK64-NEXT: retq # encoding: [0xc3] +; CHECK64-NEXT: .LBB2_1: +; CHECK64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK64-NEXT: # kill: def $al killed $al killed $eax +; CHECK64-NEXT: retq # encoding: [0xc3] +; +; WIN64-LABEL: BlockPlacementTest: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: testb $42, %dl # encoding: [0xf6,0xc2,0x2a] +; WIN64-NEXT: je .LBB2_1 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.2: # %land.rhs +; WIN64-NEXT: movb $1, %al # encoding: [0xb0,0x01] +; WIN64-NEXT: testb $44, %dl # encoding: [0xf6,0xc2,0x2c] +; WIN64-NEXT: je baz # TAILCALL +; WIN64-NEXT: # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: baz-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.3: # %land.end +; WIN64-NEXT: # kill: def $al killed $al killed $eax +; WIN64-NEXT: retq # encoding: [0xc3] +; WIN64-NEXT: .LBB2_1: +; WIN64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; WIN64-NEXT: # kill: def $al killed $al killed $eax +; WIN64-NEXT: retq # encoding: [0xc3] entry: %and = and i32 %x, 42 %tobool = icmp eq i32 %and, 0 @@ -69,10 +222,6 @@ land.end: ; Make sure machine block placement isn't confused by the conditional tail call, ; but sees that it can fall through to the next block. -; CHECK-LABEL: BlockPlacementTest -; CHECK: je baz -; CHECK-NOT: xor -; CHECK: ret } @@ -82,7 +231,296 @@ land.end: declare zeroext i1 @_Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_(i8*, i8*) define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly dereferenceable(8) %s) minsize { -; CHECK-LABEL: pr31257 +; CHECK32-LABEL: pr31257: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: pushl %ebp # encoding: [0x55] +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: pushl %ebx # encoding: [0x53] +; CHECK32-NEXT: .cfi_def_cfa_offset 12 +; CHECK32-NEXT: pushl %edi # encoding: [0x57] +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: pushl %esi # encoding: [0x56] +; CHECK32-NEXT: .cfi_def_cfa_offset 20 +; CHECK32-NEXT: subl $12, %esp # encoding: [0x83,0xec,0x0c] +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: .cfi_offset %esi, -20 +; CHECK32-NEXT: .cfi_offset %edi, -16 +; CHECK32-NEXT: .cfi_offset %ebx, -12 +; CHECK32-NEXT: .cfi_offset %ebp, -8 +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x20] +; CHECK32-NEXT: movl (%eax), %eax # encoding: [0x8b,0x00] +; CHECK32-NEXT: movl -24(%eax), %edx # encoding: [0x8b,0x50,0xe8] +; CHECK32-NEXT: leal (%eax,%edx), %ebp # encoding: [0x8d,0x2c,0x10] +; CHECK32-NEXT: xorl %ebx, %ebx # encoding: [0x31,0xdb] +; CHECK32-NEXT: pushl $2 # encoding: [0x6a,0x02] +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: popl %esi # encoding: [0x5e] +; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 +; CHECK32-NEXT: xorl %edi, %edi # encoding: [0x31,0xff] +; CHECK32-NEXT: incl %edi # encoding: [0x47] +; CHECK32-NEXT: jmp .LBB3_1 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_2: # %for.body +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: cmpl $2, %ebx # encoding: [0x83,0xfb,0x02] +; CHECK32-NEXT: je .LBB3_11 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.3: # %for.body +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: cmpl $1, %ebx # encoding: [0x83,0xfb,0x01] +; CHECK32-NEXT: je .LBB3_9 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.4: # %for.body +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: testl %ebx, %ebx # encoding: [0x85,0xdb] +; CHECK32-NEXT: jne .LBB3_10 # encoding: [0x75,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.5: # %sw.bb +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08] +; CHECK32-NEXT: cmpl $43, %ecx # encoding: [0x83,0xf9,0x2b] +; CHECK32-NEXT: movl %edi, %ebx # encoding: [0x89,0xfb] +; CHECK32-NEXT: je .LBB3_10 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.6: # %sw.bb +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: cmpb $45, %cl # encoding: [0x80,0xf9,0x2d] +; CHECK32-NEXT: movl %edi, %ebx # encoding: [0x89,0xfb] +; CHECK32-NEXT: je .LBB3_10 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK32-NEXT: jmp .LBB3_7 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_11: # %sw.bb22 +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08] +; CHECK32-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0] +; CHECK32-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a] +; CHECK32-NEXT: movl %esi, %ebx # encoding: [0x89,0xf3] +; CHECK32-NEXT: jb .LBB3_10 # encoding: [0x72,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK32-NEXT: jmp .LBB3_12 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_9: # %sw.bb14 +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: movzbl (%eax), %ecx # encoding: [0x0f,0xb6,0x08] +; CHECK32-NEXT: .LBB3_7: # %if.else +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0] +; CHECK32-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a] +; CHECK32-NEXT: movl %esi, %ebx # encoding: [0x89,0xf3] +; CHECK32-NEXT: jae .LBB3_8 # encoding: [0x73,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_10: # %for.inc +; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK32-NEXT: incl %eax # encoding: [0x40] +; CHECK32-NEXT: decl %edx # encoding: [0x4a] +; CHECK32-NEXT: .LBB3_1: # %for.cond +; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK32-NEXT: testl %edx, %edx # encoding: [0x85,0xd2] +; CHECK32-NEXT: jne .LBB3_2 # encoding: [0x75,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_2-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.13: +; CHECK32-NEXT: cmpl $2, %ebx # encoding: [0x83,0xfb,0x02] +; CHECK32-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; CHECK32-NEXT: jmp .LBB3_14 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_14-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_8: +; CHECK32-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK32-NEXT: .LBB3_14: # %cleanup.thread +; CHECK32-NEXT: # kill: def $al killed $al killed $eax +; CHECK32-NEXT: addl $12, %esp # encoding: [0x83,0xc4,0x0c] +; CHECK32-NEXT: .cfi_def_cfa_offset 20 +; CHECK32-NEXT: .LBB3_15: # %cleanup.thread +; CHECK32-NEXT: popl %esi # encoding: [0x5e] +; CHECK32-NEXT: .cfi_def_cfa_offset 16 +; CHECK32-NEXT: popl %edi # encoding: [0x5f] +; CHECK32-NEXT: .cfi_def_cfa_offset 12 +; CHECK32-NEXT: popl %ebx # encoding: [0x5b] +; CHECK32-NEXT: .cfi_def_cfa_offset 8 +; CHECK32-NEXT: popl %ebp # encoding: [0x5d] +; CHECK32-NEXT: .cfi_def_cfa_offset 4 +; CHECK32-NEXT: retl # encoding: [0xc3] +; CHECK32-NEXT: .LBB3_12: # %if.else28 +; CHECK32-NEXT: .cfi_def_cfa_offset 32 +; CHECK32-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] +; CHECK32-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK32-NEXT: pushl %ebp # encoding: [0x55] +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: pushl %eax # encoding: [0x50] +; CHECK32-NEXT: .cfi_adjust_cfa_offset 4 +; CHECK32-NEXT: calll _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # encoding: [0xe8,A,A,A,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-4, kind: FK_PCRel_4 +; CHECK32-NEXT: addl $28, %esp # encoding: [0x83,0xc4,0x1c] +; CHECK32-NEXT: .cfi_adjust_cfa_offset -28 +; CHECK32-NEXT: jmp .LBB3_15 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_15-1, kind: FK_PCRel_1 +; +; CHECK64-LABEL: pr31257: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: movq (%rdi), %rdi # encoding: [0x48,0x8b,0x3f] +; CHECK64-NEXT: movq -24(%rdi), %rax # encoding: [0x48,0x8b,0x47,0xe8] +; CHECK64-NEXT: leaq (%rdi,%rax), %rsi # encoding: [0x48,0x8d,0x34,0x07] +; CHECK64-NEXT: xorl %ecx, %ecx # encoding: [0x31,0xc9] +; CHECK64-NEXT: pushq $2 # encoding: [0x6a,0x02] +; CHECK64-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK64-NEXT: popq %r9 # encoding: [0x41,0x59] +; CHECK64-NEXT: .cfi_adjust_cfa_offset -8 +; CHECK64-NEXT: pushq $1 # encoding: [0x6a,0x01] +; CHECK64-NEXT: .cfi_adjust_cfa_offset 8 +; CHECK64-NEXT: popq %r8 # encoding: [0x41,0x58] +; CHECK64-NEXT: .cfi_adjust_cfa_offset -8 +; CHECK64-NEXT: jmp .LBB3_1 # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB3_2: # %for.body +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02] +; CHECK64-NEXT: je .LBB3_11 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.3: # %for.body +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK64-NEXT: cmpl $1, %ecx # encoding: [0x83,0xf9,0x01] +; CHECK64-NEXT: je .LBB3_10 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.4: # %for.body +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK64-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9] +; CHECK64-NEXT: jne .LBB3_12 # encoding: [0x75,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.5: # %sw.bb +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK64-NEXT: movzbl (%rdi), %edx # encoding: [0x0f,0xb6,0x17] +; CHECK64-NEXT: cmpl $43, %edx # encoding: [0x83,0xfa,0x2b] +; CHECK64-NEXT: movl %r8d, %ecx # encoding: [0x44,0x89,0xc1] +; CHECK64-NEXT: je .LBB3_12 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.6: # %sw.bb +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK64-NEXT: cmpb $45, %dl # encoding: [0x80,0xfa,0x2d] +; CHECK64-NEXT: movl %r8d, %ecx # encoding: [0x44,0x89,0xc1] +; CHECK64-NEXT: je .LBB3_12 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.7: # %if.else +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK64-NEXT: addl $-48, %edx # encoding: [0x83,0xc2,0xd0] +; CHECK64-NEXT: cmpl $10, %edx # encoding: [0x83,0xfa,0x0a] +; CHECK64-NEXT: jmp .LBB3_8 # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB3_10: # %sw.bb14 +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK64-NEXT: movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f] +; CHECK64-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0] +; CHECK64-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a] +; CHECK64-NEXT: .LBB3_8: # %if.else +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK64-NEXT: movl %r9d, %ecx # encoding: [0x44,0x89,0xc9] +; CHECK64-NEXT: jb .LBB3_12 # encoding: [0x72,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1 +; CHECK64-NEXT: jmp .LBB3_9 # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB3_11: # %sw.bb22 +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK64-NEXT: movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f] +; CHECK64-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0] +; CHECK64-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a] +; CHECK64-NEXT: movl %r9d, %ecx # encoding: [0x44,0x89,0xc9] +; CHECK64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL +; CHECK64-NEXT: # encoding: [0x73,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB3_12: # %for.inc +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK64-NEXT: incq %rdi # encoding: [0x48,0xff,0xc7] +; CHECK64-NEXT: decq %rax # encoding: [0x48,0xff,0xc8] +; CHECK64-NEXT: .LBB3_1: # %for.cond +; CHECK64-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK64-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0] +; CHECK64-NEXT: jne .LBB3_2 # encoding: [0x75,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_2-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.13: +; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02] +; CHECK64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; CHECK64-NEXT: # kill: def $al killed $al killed $eax +; CHECK64-NEXT: retq # encoding: [0xc3] +; CHECK64-NEXT: .LBB3_9: +; CHECK64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; CHECK64-NEXT: # kill: def $al killed $al killed $eax +; CHECK64-NEXT: retq # encoding: [0xc3] +; +; WIN64-LABEL: pr31257: +; WIN64: # %bb.0: # %entry +; WIN64-NEXT: movq (%rcx), %rcx # encoding: [0x48,0x8b,0x09] +; WIN64-NEXT: movq -24(%rcx), %r8 # encoding: [0x4c,0x8b,0x41,0xe8] +; WIN64-NEXT: leaq (%rcx,%r8), %rdx # encoding: [0x4a,0x8d,0x14,0x01] +; WIN64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; WIN64-NEXT: jmp .LBB3_1 # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB3_2: # %for.body +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02] +; WIN64-NEXT: je .LBB3_10 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.3: # %for.body +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; WIN64-NEXT: cmpl $1, %eax # encoding: [0x83,0xf8,0x01] +; WIN64-NEXT: je .LBB3_9 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.4: # %for.body +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; WIN64-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] +; WIN64-NEXT: jne .LBB3_11 # encoding: [0x75,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.5: # %sw.bb +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09] +; WIN64-NEXT: cmpl $43, %r9d # encoding: [0x41,0x83,0xf9,0x2b] +; WIN64-NEXT: movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00] +; WIN64-NEXT: je .LBB3_11 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.6: # %sw.bb +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; WIN64-NEXT: cmpb $45, %r9b # encoding: [0x41,0x80,0xf9,0x2d] +; WIN64-NEXT: je .LBB3_11 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; WIN64-NEXT: jmp .LBB3_7 # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB3_9: # %sw.bb14 +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09] +; WIN64-NEXT: .LBB3_7: # %if.else +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; WIN64-NEXT: addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0] +; WIN64-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00] +; WIN64-NEXT: cmpl $10, %r9d # encoding: [0x41,0x83,0xf9,0x0a] +; WIN64-NEXT: jb .LBB3_11 # encoding: [0x72,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; WIN64-NEXT: jmp .LBB3_8 # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB3_10: # %sw.bb22 +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09] +; WIN64-NEXT: addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0] +; WIN64-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00] +; WIN64-NEXT: cmpl $10, %r9d # encoding: [0x41,0x83,0xf9,0x0a] +; WIN64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL +; WIN64-NEXT: # encoding: [0x73,A] +; WIN64-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB3_11: # %for.inc +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; WIN64-NEXT: incq %rcx # encoding: [0x48,0xff,0xc1] +; WIN64-NEXT: decq %r8 # encoding: [0x49,0xff,0xc8] +; WIN64-NEXT: .LBB3_1: # %for.cond +; WIN64-NEXT: # =>This Inner Loop Header: Depth=1 +; WIN64-NEXT: testq %r8, %r8 # encoding: [0x4d,0x85,0xc0] +; WIN64-NEXT: jne .LBB3_2 # encoding: [0x75,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_2-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.12: +; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02] +; WIN64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] +; WIN64-NEXT: # kill: def $al killed $al killed $eax +; WIN64-NEXT: retq # encoding: [0xc3] +; WIN64-NEXT: .LBB3_8: +; WIN64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; WIN64-NEXT: # kill: def $al killed $al killed $eax +; WIN64-NEXT: retq # encoding: [0xc3] entry: %_M_p.i.i = getelementptr inbounds %"class.std::basic_string", %"class.std::basic_string"* %s, i64 0, i32 0, i32 0 %0 = load i8*, i8** %_M_p.i.i, align 8 @@ -134,12 +572,6 @@ sw.bb22: ; preds = %for.body ; Make sure Machine Copy Propagation doesn't delete the mov to %ecx becaue it ; thinks the conditional tail call clobbers it. -; CHECK64-LABEL: .LBB3_11: -; CHECK64: movzbl (%rdi), %ecx -; CHECK64-NEXT: addl $-48, %ecx -; CHECK64-NEXT: cmpl $10, %ecx -; CHECK64-NEXT: movl %r9d, %ecx -; CHECK64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEE if.else28: ; preds = %sw.bb22 %call34 = tail call zeroext i1 @_Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_(i8* nonnull %it.sroa.0.0, i8* %add.ptr.i56) diff --git a/llvm/test/CodeGen/X86/tail-merge-after-mbp.mir b/llvm/test/CodeGen/X86/tail-merge-after-mbp.mir index 67ad823f357..5965004ebca 100644 --- a/llvm/test/CodeGen/X86/tail-merge-after-mbp.mir +++ b/llvm/test/CodeGen/X86/tail-merge-after-mbp.mir @@ -1,34 +1,56 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=x86_64-linux -run-pass=block-placement -o - %s | FileCheck %s --- # check loop bb.7 is not merged with bb.10, bb.13 # check loop bb.9 is not merged with bb.12 -# CHECK: bb.2: -# CHECK-NEXT: successors: %bb.3(0x30000000), %bb.4(0x50000000) -# CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg -# CHECK-NEXT: TEST64rr $rax, $rax -# CHECK-NEXT: JCC_1 %bb.3, 4 -# CHECK: bb.4: -# CHECK-NEXT: successors: %bb.5(0x30000000), %bb.10(0x50000000) -# CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0 -# CHECK-NEXT: JCC_1 %bb.10, 5 -# CHECK: bb.5: -# CHECK-NEXT: successors: %bb.6(0x30000000), %bb.7(0x50000000) -# CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg -# CHECK-NEXT: TEST64rr $rax, $rax -# CHECK-NEXT: JCC_1 %bb.6, 4 -# CHECK: bb.7 -# CHECK-NEXT: successors: %bb.8(0x71555555), %bb.10(0x0eaaaaab) -# CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0 -# CHECK-NEXT: JCC_1 %bb.10, 5 -# CHECK: bb.8: -# CHECK-NEXT: successors: %bb.9(0x04000000), %bb.7(0x7c000000) -# CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg -# CHECK-NEXT: TEST64rr $rax, $rax -# CHECK-NEXT: JCC_1 %bb.7, 5 name: foo body: | + ; CHECK-LABEL: name: foo + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: TEST8ri $dl, 1, implicit-def $eflags, implicit killed $edx + ; CHECK: JCC_1 %bb.2, 4, implicit $eflags + ; CHECK: bb.1: + ; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags + ; CHECK: RETQ $eax + ; CHECK: bb.2: + ; CHECK: successors: %bb.3(0x30000000), %bb.4(0x50000000) + ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8) + ; CHECK: TEST64rr $rax, $rax, implicit-def $eflags + ; CHECK: JCC_1 %bb.3, 4, implicit $eflags + ; CHECK: bb.4: + ; CHECK: successors: %bb.5(0x30000000), %bb.10(0x50000000) + ; CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load 8) + ; CHECK: JCC_1 %bb.10, 5, implicit $eflags + ; CHECK: bb.5: + ; CHECK: successors: %bb.6(0x30000000), %bb.7(0x50000000) + ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8) + ; CHECK: TEST64rr $rax, $rax, implicit-def $eflags + ; CHECK: JCC_1 %bb.6, 4, implicit $eflags + ; CHECK: bb.7 (align 4): + ; CHECK: successors: %bb.8(0x71555555), %bb.10(0x0eaaaaab) + ; CHECK: CMP64mi8 killed $rax, 1, $noreg, 8, $noreg, 0, implicit-def $eflags :: (load 8), (load 8) + ; CHECK: JCC_1 %bb.10, 5, implicit $eflags + ; CHECK: bb.8: + ; CHECK: successors: %bb.9(0x04000000), %bb.7(0x7c000000) + ; CHECK: $rax = MOV64rm $r14, 1, $noreg, 0, $noreg :: (load 8) + ; CHECK: TEST64rr $rax, $rax, implicit-def $eflags + ; CHECK: JCC_1 %bb.7, 5, implicit $eflags + ; CHECK: bb.9: + ; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags + ; CHECK: RETQ $eax + ; CHECK: bb.10: + ; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags + ; CHECK: dead $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, implicit-def $al + ; CHECK: RETQ $eax + ; CHECK: bb.3: + ; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags + ; CHECK: RETQ $eax + ; CHECK: bb.6: + ; CHECK: $ebp = XOR32rr undef $ebp, undef $ebp, implicit-def dead $eflags + ; CHECK: RETQ $eax bb.0: successors: %bb.1(0x40000000), %bb.7(0x40000000) diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll index b1e8f9c0e9c..bf551cba22e 100644 --- a/llvm/test/CodeGen/X86/tail-opts.ll +++ b/llvm/test/CodeGen/X86/tail-opts.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false -post-RA-scheduler=true | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -post-RA-scheduler=true | FileCheck %s declare void @bar(i32) declare void @car(i32) @@ -13,13 +14,44 @@ declare i1 @qux() ; BranchFolding should tail-merge the stores since they all precede ; direct branches to the same place. -; CHECK-LABEL: tail_merge_me: -; CHECK-NOT: GHJK -; CHECK: movl $0, GHJK(%rip) -; CHECK-NEXT: movl $1, HABC(%rip) -; CHECK-NOT: GHJK - define void @tail_merge_me() nounwind { +; CHECK-LABEL: tail_merge_me: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.6: # %A +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: callq bar +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_1: # %next +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # %bb.2: # %B +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: callq car +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_3: # %C +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: callq dar +; CHECK-NEXT: .LBB0_4: # %M +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: movl $1, {{.*}}(%rip) +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB0_5 +; CHECK-NEXT: # %bb.7: # %return +; CHECK-NEXT: movl $1000, %edi # imm = 0x3E8 +; CHECK-NEXT: callq ear +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_5: # %altret +; CHECK-NEXT: movl $1001, %edi # imm = 0x3E9 +; CHECK-NEXT: callq far +; CHECK-NEXT: popq %rax +; CHECK-NEXT: retq entry: %a = call i1 @qux() br i1 %a, label %A, label %next @@ -60,15 +92,53 @@ declare i8* @choose(i8*, i8*) ; BranchFolding should tail-duplicate the indirect jump to avoid ; redundant branching. -; CHECK-LABEL: tail_duplicate_me: -; CHECK: movl $0, GHJK(%rip) -; CHECK-NEXT: jmpq *%r -; CHECK: movl $0, GHJK(%rip) -; CHECK-NEXT: jmpq *%r -; CHECK: movl $0, GHJK(%rip) -; CHECK-NEXT: jmpq *%r - define void @tail_duplicate_me() nounwind { +; CHECK-LABEL: tail_duplicate_me: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq qux +; CHECK-NEXT: movl $.Ltmp0, %edi +; CHECK-NEXT: movl $.Ltmp1, %esi +; CHECK-NEXT: movl %eax, %ebx +; CHECK-NEXT: callq choose +; CHECK-NEXT: movq %rax, %r14 +; CHECK-NEXT: testb $1, %bl +; CHECK-NEXT: je .LBB1_1 +; CHECK-NEXT: # %bb.7: # %A +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: callq bar +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: jmpq *%r14 +; CHECK-NEXT: .Ltmp0: # Block address taken +; CHECK-NEXT: .LBB1_4: # %return +; CHECK-NEXT: movl $1000, %edi # imm = 0x3E8 +; CHECK-NEXT: callq ear +; CHECK-NEXT: jmp .LBB1_5 +; CHECK-NEXT: .LBB1_1: # %next +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB1_3 +; CHECK-NEXT: # %bb.2: # %B +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: callq car +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: jmpq *%r14 +; CHECK-NEXT: .Ltmp1: # Block address taken +; CHECK-NEXT: .LBB1_6: # %altret +; CHECK-NEXT: movl $1001, %edi # imm = 0x3E9 +; CHECK-NEXT: callq far +; CHECK-NEXT: .LBB1_5: # %return +; CHECK-NEXT: addq $8, %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_3: # %C +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: callq dar +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: jmpq *%r14 entry: %a = call i1 @qux() %c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return), @@ -107,23 +177,26 @@ altret: ; BranchFolding shouldn't try to merge the tails of two blocks ; with only a branch in common, regardless of the fallthrough situation. -; CHECK-LABEL: dont_merge_oddly: -; CHECK-NOT: ret -; CHECK: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} -; CHECK-NEXT: jbe .LBB2_3 -; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} -; CHECK-NEXT: ja .LBB2_4 -; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB2_3: -; CHECK-NEXT: ucomiss %xmm{{[0-2]}}, %xmm{{[0-2]}} -; CHECK-NEXT: jbe .LBB2_2 -; CHECK-NEXT: .LBB2_4: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: ret - define i1 @dont_merge_oddly(float* %result) nounwind { +; CHECK-LABEL: dont_merge_oddly: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: ucomiss %xmm1, %xmm2 +; CHECK-NEXT: jbe .LBB2_3 +; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: ucomiss %xmm0, %xmm1 +; CHECK-NEXT: ja .LBB2_4 +; CHECK-NEXT: .LBB2_2: # %bb30 +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB2_3: # %bb21 +; CHECK-NEXT: ucomiss %xmm0, %xmm2 +; CHECK-NEXT: jbe .LBB2_2 +; CHECK-NEXT: .LBB2_4: # %bb26 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: retq entry: %tmp4 = getelementptr float, float* %result, i32 2 %tmp5 = load float, float* %tmp4, align 4 @@ -151,22 +224,9 @@ bb30: ; Do any-size tail-merging when two candidate blocks will both require ; an unconditional jump to complete a two-way conditional branch. - -; CHECK-LABEL: c_expand_expr_stmt: ; ; This test only works when register allocation happens to use %rax for both ; load addresses. -; -; CHE: jmp .LBB3_11 -; CHE-NEXT: .LBB3_9: -; CHE-NEXT: movq 8(%rax), %rax -; CHE-NEXT: xorl %edx, %edx -; CHE-NEXT: movb 16(%rax), %al -; CHE-NEXT: cmpb $16, %al -; CHE-NEXT: je .LBB3_11 -; CHE-NEXT: cmpb $23, %al -; CHE-NEXT: jne .LBB3_14 -; CHE-NEXT: .LBB3_11: %0 = type { %struct.rtx_def* } %struct.lang_decl = type opaque @@ -177,6 +237,80 @@ bb30: %union.tree_node = type { %struct.tree_decl } define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind { +; CHECK-LABEL: c_expand_expr_stmt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB3_17 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movb 0, %bl +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB3_16 +; CHECK-NEXT: # %bb.2: # %bb.i +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB3_16 +; CHECK-NEXT: # %bb.3: # %lvalue_p.exit +; CHECK-NEXT: movq 0, %rax +; CHECK-NEXT: movzbl (%rax), %ecx +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: je .LBB3_12 +; CHECK-NEXT: # %bb.4: # %lvalue_p.exit +; CHECK-NEXT: cmpl $2, %ecx +; CHECK-NEXT: jne .LBB3_5 +; CHECK-NEXT: # %bb.6: # %bb.i1 +; CHECK-NEXT: movq 32(%rax), %rax +; CHECK-NEXT: movzbl 16(%rax), %ecx +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: je .LBB3_10 +; CHECK-NEXT: # %bb.7: # %bb.i1 +; CHECK-NEXT: cmpl $2, %ecx +; CHECK-NEXT: jne .LBB3_8 +; CHECK-NEXT: # %bb.9: # %bb.i.i +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: callq lvalue_p +; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: setne %al +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB3_15 +; CHECK-NEXT: jmp .LBB3_17 +; CHECK-NEXT: .LBB3_16: # %bb1 +; CHECK-NEXT: cmpb $23, %bl +; CHECK-NEXT: .LBB3_17: # %bb3 +; CHECK-NEXT: .LBB3_12: # %bb2.i3 +; CHECK-NEXT: movq 8(%rax), %rax +; CHECK-NEXT: movb 16(%rax), %cl +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpb $23, %cl +; CHECK-NEXT: je .LBB3_14 +; CHECK-NEXT: # %bb.13: # %bb2.i3 +; CHECK-NEXT: cmpb $16, %cl +; CHECK-NEXT: je .LBB3_14 +; CHECK-NEXT: jmp .LBB3_17 +; CHECK-NEXT: .LBB3_5: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB3_15 +; CHECK-NEXT: jmp .LBB3_17 +; CHECK-NEXT: .LBB3_10: # %bb2.i.i2 +; CHECK-NEXT: movq 8(%rax), %rax +; CHECK-NEXT: movb 16(%rax), %cl +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpb $16, %cl +; CHECK-NEXT: je .LBB3_14 +; CHECK-NEXT: # %bb.11: # %bb2.i.i2 +; CHECK-NEXT: cmpb $23, %cl +; CHECK-NEXT: je .LBB3_14 +; CHECK-NEXT: jmp .LBB3_17 +; CHECK-NEXT: .LBB3_8: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: .LBB3_14: # %lvalue_p.exit4 +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB3_17 +; CHECK-NEXT: .LBB3_15: # %lvalue_p.exit4 +; CHECK-NEXT: testb %bl, %bl entry: %tmp4 = load i8, i8* null, align 8 ; <i8> [#uses=3] switch i8 %tmp4, label %bb3 [ @@ -274,13 +408,17 @@ declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind ; instructions are involved. This function should have only ; one ret instruction. -; CHECK-LABEL: foo: -; CHECK: callq func -; CHECK-NEXT: popq -; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: ret - define void @foo(i1* %V) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: je .LBB4_2 +; CHECK-NEXT: # %bb.1: # %bb +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq func +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .LBB4_2: # %return +; CHECK-NEXT: retq entry: %t0 = icmp eq i1* %V, null br i1 %t0, label %return, label %bb @@ -297,15 +435,25 @@ declare void @func() ; one - One instruction may be tail-duplicated even with optsize. -; CHECK-LABEL: one: -; CHECK: j{{.*}} tail_call_me -; CHECK: j{{.*}} tail_call_me - @XYZ = external global i32 declare void @tail_call_me() define void @one(i32 %v) nounwind optsize { +; CHECK-LABEL: one: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je .LBB5_3 +; CHECK-NEXT: # %bb.1: # %bby +; CHECK-NEXT: cmpl $16, %edi +; CHECK-NEXT: je .LBB5_4 +; CHECK-NEXT: # %bb.2: # %bb7 +; CHECK-NEXT: jmp tail_call_me # TAILCALL +; CHECK-NEXT: .LBB5_3: # %bbx +; CHECK-NEXT: cmpl $128, %edi +; CHECK-NEXT: jne tail_call_me # TAILCALL +; CHECK-NEXT: .LBB5_4: # %return +; CHECK-NEXT: retq entry: %0 = icmp eq i32 %v, 0 br i1 %0, label %bbx, label %bby @@ -336,14 +484,19 @@ return: ; tail instead of one. This is too much to be merged, given ; the optsize attribute. -; CHECK-LABEL: two: -; CHECK-NOT: XYZ -; CHECK: ret -; CHECK: movl $0, XYZ(%rip) -; CHECK: movl $1, XYZ(%rip) -; CHECK-NOT: XYZ - define void @two() nounwind optsize { +; CHECK-LABEL: two: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB6_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB6_1: # %bb7 +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: movl $1, {{.*}}(%rip) entry: %0 = icmp eq i32 undef, 0 br i1 %0, label %bbx, label %bby @@ -374,14 +527,19 @@ return: ; two_minsize - Same as two, but with minsize instead of optsize. -; CHECK-LABEL: two_minsize: -; CHECK-NOT: XYZ -; CHECK: ret -; CHECK: movl $0, XYZ(%rip) -; CHECK: movl $1, XYZ(%rip) -; CHECK-NOT: XYZ - define void @two_minsize() nounwind minsize { +; CHECK-LABEL: two_minsize: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB7_1 +; CHECK-NEXT: # %bb.2: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB7_1: # %bb7 +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: movl $1, {{.*}}(%rip) entry: %0 = icmp eq i32 undef, 0 br i1 %0, label %bbx, label %bby @@ -413,13 +571,28 @@ return: ; two_nosize - Same as two, but without the optsize attribute. ; Now two instructions are enough to be tail-duplicated. -; CHECK-LABEL: two_nosize: -; CHECK: movl $0, XYZ(%rip) -; CHECK: jmp tail_call_me -; CHECK: movl $0, XYZ(%rip) -; CHECK: jmp tail_call_me - define void @two_nosize() nounwind { +; CHECK-LABEL: two_nosize: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB8_3 +; CHECK-NEXT: # %bb.1: # %bby +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB8_4 +; CHECK-NEXT: # %bb.2: # %bb7 +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: jmp tail_call_me # TAILCALL +; CHECK-NEXT: .LBB8_3: # %bbx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: je .LBB8_5 +; CHECK-NEXT: .LBB8_4: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB8_5: # %bb12 +; CHECK-NEXT: movl $0, {{.*}}(%rip) +; CHECK-NEXT: jmp tail_call_me # TAILCALL entry: %0 = icmp eq i32 undef, 0 br i1 %0, label %bbx, label %bby @@ -451,12 +624,19 @@ return: ; Tail-merging should merge the two ret instructions since one side ; can fall-through into the ret and the other side has to branch anyway. -; CHECK-LABEL: TESTE: -; CHECK: ret -; CHECK-NOT: ret -; CHECK: size TESTE - define i64 @TESTE(i64 %parami, i64 %paraml) nounwind readnone { +; CHECK-LABEL: TESTE: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovgq %rdi, %rax +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: jle .LBB9_2 +; CHECK-NEXT: # %bb.1: # %bb.nph +; CHECK-NEXT: imulq %rdi, %rsi +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: .LBB9_2: # %for.end +; CHECK-NEXT: retq entry: %cmp = icmp slt i64 %parami, 1 ; <i1> [#uses=1] %varx.0 = select i1 %cmp, i64 1, i64 %parami ; <i64> [#uses=1] @@ -476,15 +656,34 @@ for.end: ; preds = %entry ; out-of-line after the main return, so we should try to eliminate as many of ; them as possible. -; CHECK-LABEL: merge_aborts: -; CHECK-NOT: callq abort -; CHECK: ret -; CHECK: callq abort -; CHECK-NOT: callq abort -; CHECK: .Lfunc_end{{.*}}: - declare void @abort() define void @merge_aborts() { +; CHECK-LABEL: merge_aborts: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB10_5 +; CHECK-NEXT: # %bb.1: # %cont1 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB10_5 +; CHECK-NEXT: # %bb.2: # %cont2 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB10_5 +; CHECK-NEXT: # %bb.3: # %cont3 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB10_5 +; CHECK-NEXT: # %bb.4: # %cont4 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB10_5: # %abort1 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq abort entry: %c1 = call i1 @qux() br i1 %c1, label %cont1, label %abort1 @@ -516,18 +715,37 @@ cont4: ; Use alternating abort functions so that the blocks we wish to merge are not ; layout successors during branch folding. -; CHECK-LABEL: merge_alternating_aborts: -; CHECK-NOT: callq abort -; CHECK: ret -; CHECK: callq abort -; CHECK: callq alt_abort -; CHECK-NOT: callq abort -; CHECK-NOT: callq alt_abort -; CHECK: .Lfunc_end{{.*}}: - declare void @alt_abort() define void @merge_alternating_aborts() { +; CHECK-LABEL: merge_alternating_aborts: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB11_5 +; CHECK-NEXT: # %bb.1: # %cont1 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB11_6 +; CHECK-NEXT: # %bb.2: # %cont2 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB11_5 +; CHECK-NEXT: # %bb.3: # %cont3 +; CHECK-NEXT: callq qux +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: je .LBB11_6 +; CHECK-NEXT: # %bb.4: # %cont4 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB11_5: # %abort1 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq abort +; CHECK-NEXT: .LBB11_6: # %abort2 +; CHECK-NEXT: callq alt_abort entry: %c1 = call i1 @qux() br i1 %c1, label %cont1, label %abort1 |