diff options
Diffstat (limited to 'llvm/test/CodeGen/X86')
24 files changed, 558 insertions, 423 deletions
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll index d2d18ec221a..acc4b7e1381 100644 --- a/llvm/test/CodeGen/X86/block-placement.ll +++ b/llvm/test/CodeGen/X86/block-placement.ll @@ -82,14 +82,14 @@ define i32 @test_loop_cold_blocks(i32 %i, i32* %a) { ; Check that we sink cold loop blocks after the hot loop body. ; CHECK-LABEL: test_loop_cold_blocks: ; CHECK: %entry -; CHECK-NOT: .p2align -; CHECK: %unlikely1 -; CHECK-NOT: .p2align -; CHECK: %unlikely2 ; CHECK: .p2align ; CHECK: %body1 ; CHECK: %body2 ; CHECK: %body3 +; CHECK-NOT: .p2align +; CHECK: %unlikely1 +; CHECK-NOT: .p2align +; CHECK: %unlikely2 ; CHECK: %exit entry: @@ -125,7 +125,7 @@ exit: ret i32 %sum } -!0 = !{!"branch_weights", i32 4, i32 64} +!0 = !{!"branch_weights", i32 1, i32 64} define i32 @test_loop_early_exits(i32 %i, i32* %a) { ; Check that we sink early exit blocks out of loop bodies. @@ -189,8 +189,8 @@ define i32 @test_loop_rotate(i32 %i, i32* %a) { ; loop, eliminating unconditional branches to the top. ; CHECK-LABEL: test_loop_rotate: ; CHECK: %entry -; CHECK: %body1 ; CHECK: %body0 +; CHECK: %body1 ; CHECK: %exit entry: @@ -957,16 +957,15 @@ define void @benchmark_heapsort(i32 %n, double* nocapture %ra) { ; CHECK: %if.else ; CHECK: %if.end10 ; Second rotated loop top -; CHECK: .p2align -; CHECK: %if.then24 ; CHECK: %while.cond.outer ; Third rotated loop top ; CHECK: .p2align +; CHECK: %if.end20 ; CHECK: %while.cond ; CHECK: %while.body ; CHECK: %land.lhs.true ; CHECK: %if.then19 -; CHECK: %if.end20 +; CHECK: %if.then24 ; CHECK: %if.then8 ; CHECK: ret @@ -1546,8 +1545,8 @@ define i32 @not_rotate_if_extra_branch_regression(i32 %count, i32 %init) { ; CHECK-LABEL: not_rotate_if_extra_branch_regression ; CHECK: %.entry ; CHECK: %.first_backedge -; CHECK: %.slow ; CHECK: %.second_header +; CHECK: %.slow .entry: %sum.0 = shl nsw i32 %count, 1 br label %.first_header diff --git a/llvm/test/CodeGen/X86/code_placement.ll b/llvm/test/CodeGen/X86/code_placement.ll index 7b5f4c34690..270612883a9 100644 --- a/llvm/test/CodeGen/X86/code_placement.ll +++ b/llvm/test/CodeGen/X86/code_placement.ll @@ -4,6 +4,11 @@ @Te1 = external global [256 x i32] ; <[256 x i32]*> [#uses=4] @Te3 = external global [256 x i32] ; <[256 x i32]*> [#uses=2] +; CHECK: %entry +; CHECK: %bb +; CHECK: %bb1 +; CHECK: %bb2 + define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp { entry: %0 = load i32, i32* %rk, align 4 ; <i32> [#uses=1] @@ -12,8 +17,6 @@ entry: %tmp15 = add i32 %r, -1 ; <i32> [#uses=1] %tmp.16 = zext i32 %tmp15 to i64 ; <i64> [#uses=2] br label %bb -; CHECK: jmp -; CHECK-NEXT: align bb: ; preds = %bb1, %entry %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ] ; <i64> [#uses=3] diff --git a/llvm/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll b/llvm/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll index b30aaea9024..48329ed86e1 100644 --- a/llvm/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll +++ b/llvm/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll @@ -1,13 +1,12 @@ ; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s define void @foo() { -; Test that when determining the edge probability from a node in an inner loop -; to a node in an outer loop, the weights on edges in the inner loop should be -; ignored if we are building the chain for the outer loop. +; After moving the latch to the top of loop, there is no fall through from the +; latch to outer loop. ; ; CHECK-LABEL: foo: -; CHECK: callq c ; CHECK: callq b +; CHECK: callq c entry: %call = call zeroext i1 @a() diff --git a/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll b/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll index 925f880b4fc..cdf2fb05a73 100644 --- a/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll +++ b/llvm/test/CodeGen/X86/code_placement_loop_rotation2.ll @@ -5,13 +5,13 @@ define void @foo() { ; Test a nested loop case when profile data is not available. ; ; CHECK-LABEL: foo: +; CHECK: callq g +; CHECK: callq h ; CHECK: callq b -; CHECK: callq c -; CHECK: callq d ; CHECK: callq e ; CHECK: callq f -; CHECK: callq g -; CHECK: callq h +; CHECK: callq c +; CHECK: callq d entry: br label %header diff --git a/llvm/test/CodeGen/X86/code_placement_no_header_change.ll b/llvm/test/CodeGen/X86/code_placement_no_header_change.ll index 0275606568c..ab173b8de71 100644 --- a/llvm/test/CodeGen/X86/code_placement_no_header_change.ll +++ b/llvm/test/CodeGen/X86/code_placement_no_header_change.ll @@ -7,9 +7,9 @@ define i32 @bar(i32 %count) { ; Later backedge1 and backedge2 is rotated before loop header. ; CHECK-LABEL: bar ; CHECK: %.entry +; CHECK: %.header ; CHECK: %.backedge1 ; CHECK: %.backedge2 -; CHECK: %.header ; CHECK: %.exit .entry: %c = shl nsw i32 %count, 2 diff --git a/llvm/test/CodeGen/X86/conditional-tailcall.ll b/llvm/test/CodeGen/X86/conditional-tailcall.ll index e734773b7a4..e1a1e7b777d 100644 --- a/llvm/test/CodeGen/X86/conditional-tailcall.ll +++ b/llvm/test/CodeGen/X86/conditional-tailcall.ll @@ -258,9 +258,12 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe ; CHECK32-NEXT: .cfi_adjust_cfa_offset -4 ; CHECK32-NEXT: xorl %edi, %edi # encoding: [0x31,0xff] ; CHECK32-NEXT: incl %edi # encoding: [0x47] -; CHECK32-NEXT: jmp .LBB3_1 # encoding: [0xeb,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 -; CHECK32-NEXT: .LBB3_2: # %for.body +; CHECK32-NEXT: .LBB3_1: # %for.cond +; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK32-NEXT: testl %edx, %edx # encoding: [0x85,0xd2] +; CHECK32-NEXT: je .LBB3_13 # encoding: [0x74,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1 +; CHECK32-NEXT: # %bb.2: # %for.body ; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK32-NEXT: cmpl $2, %ebx # encoding: [0x83,0xfb,0x02] ; CHECK32-NEXT: je .LBB3_11 # encoding: [0x74,A] @@ -314,12 +317,9 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe ; CHECK32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK32-NEXT: incl %eax # encoding: [0x40] ; CHECK32-NEXT: decl %edx # encoding: [0x4a] -; CHECK32-NEXT: .LBB3_1: # %for.cond -; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK32-NEXT: testl %edx, %edx # encoding: [0x85,0xd2] -; CHECK32-NEXT: jne .LBB3_2 # encoding: [0x75,A] -; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_2-1, kind: FK_PCRel_1 -; CHECK32-NEXT: # %bb.13: +; CHECK32-NEXT: jmp .LBB3_1 # encoding: [0xeb,A] +; CHECK32-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 +; CHECK32-NEXT: .LBB3_13: ; CHECK32-NEXT: cmpl $2, %ebx # encoding: [0x83,0xfb,0x02] ; CHECK32-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] ; CHECK32-NEXT: jmp .LBB3_14 # encoding: [0xeb,A] @@ -369,56 +369,59 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe ; CHECK64-NEXT: .cfi_adjust_cfa_offset 8 ; CHECK64-NEXT: popq %r8 # encoding: [0x41,0x58] ; CHECK64-NEXT: .cfi_adjust_cfa_offset -8 -; CHECK64-NEXT: jmp .LBB3_11 # encoding: [0xeb,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 -; CHECK64-NEXT: .LBB3_1: # %for.body -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 -; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02] -; CHECK64-NEXT: je .LBB3_9 # encoding: [0x74,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB3_1: # %for.cond +; CHECK64-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK64-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0] +; CHECK64-NEXT: je .LBB3_12 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1 ; CHECK64-NEXT: # %bb.2: # %for.body -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 -; CHECK64-NEXT: cmpl $1, %ecx # encoding: [0x83,0xf9,0x01] -; CHECK64-NEXT: je .LBB3_7 # encoding: [0x74,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02] +; CHECK64-NEXT: je .LBB3_10 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 ; CHECK64-NEXT: # %bb.3: # %for.body -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; CHECK64-NEXT: cmpl $1, %ecx # encoding: [0x83,0xf9,0x01] +; CHECK64-NEXT: je .LBB3_8 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.4: # %for.body +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: testl %ecx, %ecx # encoding: [0x85,0xc9] -; CHECK64-NEXT: jne .LBB3_10 # encoding: [0x75,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 -; CHECK64-NEXT: # %bb.4: # %sw.bb -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: jne .LBB3_11 # encoding: [0x75,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.5: # %sw.bb +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: movzbl (%rdi), %edx # encoding: [0x0f,0xb6,0x17] ; CHECK64-NEXT: cmpl $43, %edx # encoding: [0x83,0xfa,0x2b] ; CHECK64-NEXT: movl %r8d, %ecx # encoding: [0x44,0x89,0xc1] -; CHECK64-NEXT: je .LBB3_10 # encoding: [0x74,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 -; CHECK64-NEXT: # %bb.5: # %sw.bb -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: je .LBB3_11 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.6: # %sw.bb +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: cmpb $45, %dl # encoding: [0x80,0xfa,0x2d] ; CHECK64-NEXT: movl %r8d, %ecx # encoding: [0x44,0x89,0xc1] -; CHECK64-NEXT: je .LBB3_10 # encoding: [0x74,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 -; CHECK64-NEXT: # %bb.6: # %if.else -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: je .LBB3_11 # encoding: [0x74,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 +; CHECK64-NEXT: # %bb.7: # %if.else +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: addl $-48, %edx # encoding: [0x83,0xc2,0xd0] ; CHECK64-NEXT: cmpl $10, %edx # encoding: [0x83,0xfa,0x0a] -; CHECK64-NEXT: jmp .LBB3_8 # encoding: [0xeb,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 -; CHECK64-NEXT: .LBB3_7: # %sw.bb14 -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: jmp .LBB3_9 # encoding: [0xeb,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; CHECK64-NEXT: .LBB3_8: # %sw.bb14 +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f] ; CHECK64-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0] ; CHECK64-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a] -; CHECK64-NEXT: .LBB3_8: # %if.else -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: .LBB3_9: # %if.else +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: movl %r9d, %ecx # encoding: [0x44,0x89,0xc9] -; CHECK64-NEXT: jb .LBB3_10 # encoding: [0x72,A] -; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; CHECK64-NEXT: jb .LBB3_11 # encoding: [0x72,A] +; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 ; CHECK64-NEXT: jmp .LBB3_13 # encoding: [0xeb,A] ; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_13-1, kind: FK_PCRel_1 -; CHECK64-NEXT: .LBB3_9: # %sw.bb22 -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: .LBB3_10: # %sw.bb22 +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: movzbl (%rdi), %ecx # encoding: [0x0f,0xb6,0x0f] ; CHECK64-NEXT: addl $-48, %ecx # encoding: [0x83,0xc1,0xd0] ; CHECK64-NEXT: cmpl $10, %ecx # encoding: [0x83,0xf9,0x0a] @@ -426,16 +429,13 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe ; CHECK64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL ; CHECK64-NEXT: # encoding: [0x73,A] ; CHECK64-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1 -; CHECK64-NEXT: .LBB3_10: # %for.inc -; CHECK64-NEXT: # in Loop: Header=BB3_11 Depth=1 +; CHECK64-NEXT: .LBB3_11: # %for.inc +; CHECK64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; CHECK64-NEXT: incq %rdi # encoding: [0x48,0xff,0xc7] ; CHECK64-NEXT: decq %rax # encoding: [0x48,0xff,0xc8] -; CHECK64-NEXT: .LBB3_11: # %for.cond -; CHECK64-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK64-NEXT: testq %rax, %rax # encoding: [0x48,0x85,0xc0] -; CHECK64-NEXT: jne .LBB3_1 # encoding: [0x75,A] +; CHECK64-NEXT: jmp .LBB3_1 # encoding: [0xeb,A] ; CHECK64-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 -; CHECK64-NEXT: # %bb.12: +; CHECK64-NEXT: .LBB3_12: ; CHECK64-NEXT: cmpl $2, %ecx # encoding: [0x83,0xf9,0x02] ; CHECK64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] ; CHECK64-NEXT: # kill: def $al killed $al killed $eax @@ -451,51 +451,54 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe ; WIN64-NEXT: movq -24(%rcx), %r8 # encoding: [0x4c,0x8b,0x41,0xe8] ; WIN64-NEXT: leaq (%rcx,%r8), %rdx # encoding: [0x4a,0x8d,0x14,0x01] ; WIN64-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] -; WIN64-NEXT: jmp .LBB3_10 # encoding: [0xeb,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 -; WIN64-NEXT: .LBB3_1: # %for.body -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 -; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02] -; WIN64-NEXT: je .LBB3_8 # encoding: [0x74,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB3_1: # %for.cond +; WIN64-NEXT: # =>This Inner Loop Header: Depth=1 +; WIN64-NEXT: testq %r8, %r8 # encoding: [0x4d,0x85,0xc0] +; WIN64-NEXT: je .LBB3_11 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_11-1, kind: FK_PCRel_1 ; WIN64-NEXT: # %bb.2: # %for.body -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 -; WIN64-NEXT: cmpl $1, %eax # encoding: [0x83,0xf8,0x01] -; WIN64-NEXT: je .LBB3_6 # encoding: [0x74,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_6-1, kind: FK_PCRel_1 +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02] +; WIN64-NEXT: je .LBB3_9 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 ; WIN64-NEXT: # %bb.3: # %for.body -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 +; WIN64-NEXT: cmpl $1, %eax # encoding: [0x83,0xf8,0x01] +; WIN64-NEXT: je .LBB3_7 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.4: # %for.body +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; WIN64-NEXT: testl %eax, %eax # encoding: [0x85,0xc0] -; WIN64-NEXT: jne .LBB3_9 # encoding: [0x75,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 -; WIN64-NEXT: # %bb.4: # %sw.bb -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: jne .LBB3_10 # encoding: [0x75,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.5: # %sw.bb +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09] ; WIN64-NEXT: cmpl $43, %r9d # encoding: [0x41,0x83,0xf9,0x2b] ; WIN64-NEXT: movl $1, %eax # encoding: [0xb8,0x01,0x00,0x00,0x00] -; WIN64-NEXT: je .LBB3_9 # encoding: [0x74,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 -; WIN64-NEXT: # %bb.5: # %sw.bb -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: je .LBB3_10 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; WIN64-NEXT: # %bb.6: # %sw.bb +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; WIN64-NEXT: cmpb $45, %r9b # encoding: [0x41,0x80,0xf9,0x2d] -; WIN64-NEXT: je .LBB3_9 # encoding: [0x74,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 -; WIN64-NEXT: jmp .LBB3_7 # encoding: [0xeb,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_7-1, kind: FK_PCRel_1 -; WIN64-NEXT: .LBB3_6: # %sw.bb14 -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: je .LBB3_10 # encoding: [0x74,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 +; WIN64-NEXT: jmp .LBB3_8 # encoding: [0xeb,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_8-1, kind: FK_PCRel_1 +; WIN64-NEXT: .LBB3_7: # %sw.bb14 +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09] -; WIN64-NEXT: .LBB3_7: # %if.else -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: .LBB3_8: # %if.else +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; WIN64-NEXT: addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0] ; WIN64-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00] ; WIN64-NEXT: cmpl $10, %r9d # encoding: [0x41,0x83,0xf9,0x0a] -; WIN64-NEXT: jb .LBB3_9 # encoding: [0x72,A] -; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_9-1, kind: FK_PCRel_1 +; WIN64-NEXT: jb .LBB3_10 # encoding: [0x72,A] +; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_10-1, kind: FK_PCRel_1 ; WIN64-NEXT: jmp .LBB3_12 # encoding: [0xeb,A] ; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_12-1, kind: FK_PCRel_1 -; WIN64-NEXT: .LBB3_8: # %sw.bb22 -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: .LBB3_9: # %sw.bb22 +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; WIN64-NEXT: movzbl (%rcx), %r9d # encoding: [0x44,0x0f,0xb6,0x09] ; WIN64-NEXT: addl $-48, %r9d # encoding: [0x41,0x83,0xc1,0xd0] ; WIN64-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00] @@ -503,16 +506,13 @@ define zeroext i1 @pr31257(%"class.std::basic_string"* nocapture readonly derefe ; WIN64-NEXT: jae _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_ # TAILCALL ; WIN64-NEXT: # encoding: [0x73,A] ; WIN64-NEXT: # fixup A - offset: 1, value: _Z20isValidIntegerSuffixN9__gnu_cxx17__normal_iteratorIPKcSsEES3_-1, kind: FK_PCRel_1 -; WIN64-NEXT: .LBB3_9: # %for.inc -; WIN64-NEXT: # in Loop: Header=BB3_10 Depth=1 +; WIN64-NEXT: .LBB3_10: # %for.inc +; WIN64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; WIN64-NEXT: incq %rcx # encoding: [0x48,0xff,0xc1] ; WIN64-NEXT: decq %r8 # encoding: [0x49,0xff,0xc8] -; WIN64-NEXT: .LBB3_10: # %for.cond -; WIN64-NEXT: # =>This Inner Loop Header: Depth=1 -; WIN64-NEXT: testq %r8, %r8 # encoding: [0x4d,0x85,0xc0] -; WIN64-NEXT: jne .LBB3_1 # encoding: [0x75,A] +; WIN64-NEXT: jmp .LBB3_1 # encoding: [0xeb,A] ; WIN64-NEXT: # fixup A - offset: 1, value: .LBB3_1-1, kind: FK_PCRel_1 -; WIN64-NEXT: # %bb.11: +; WIN64-NEXT: .LBB3_11: ; WIN64-NEXT: cmpl $2, %eax # encoding: [0x83,0xf8,0x02] ; WIN64-NEXT: sete %al # encoding: [0x0f,0x94,0xc0] ; WIN64-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/loop-blocks.ll b/llvm/test/CodeGen/X86/loop-blocks.ll index f39c8a8eab9..a5e806d936b 100644 --- a/llvm/test/CodeGen/X86/loop-blocks.ll +++ b/llvm/test/CodeGen/X86/loop-blocks.ll @@ -7,12 +7,14 @@ ; order to avoid a branch within the loop. ; CHECK-LABEL: simple: -; CHECK: jmp .LBB0_1 -; CHECK-NEXT: align -; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: callq loop_latch +; CHECK: align ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: callq loop_header +; CHECK: js .LBB0_3 +; CHECK-NEXT: callq loop_latch +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: +; CHECK-NEXT: callq exit define void @simple() nounwind { entry: @@ -75,17 +77,21 @@ exit: ; CHECK-LABEL: yet_more_involved: ; CHECK: jmp .LBB2_1 ; CHECK-NEXT: align -; CHECK-NEXT: .LBB2_5: -; CHECK-NEXT: callq block_a_true_func -; CHECK-NEXT: callq block_a_merge_func -; CHECK-NEXT: .LBB2_1: + +; CHECK: .LBB2_1: ; CHECK-NEXT: callq body -; -; LBB2_4 -; CHECK: callq bar99 +; CHECK-NEXT: callq get +; CHECK-NEXT: cmpl $2, %eax +; CHECK-NEXT: jge .LBB2_2 +; CHECK-NEXT: callq bar99 ; CHECK-NEXT: callq get ; CHECK-NEXT: cmpl $2999, %eax -; CHECK-NEXT: jle .LBB2_5 +; CHECK-NEXT: jg .LBB2_6 +; CHECK-NEXT: callq block_a_true_func +; CHECK-NEXT: callq block_a_merge_func +; CHECK-NEXT: jmp .LBB2_1 +; CHECK-NEXT: align +; CHECK-NEXT: .LBB2_6: ; CHECK-NEXT: callq block_a_false_func ; CHECK-NEXT: callq block_a_merge_func ; CHECK-NEXT: jmp .LBB2_1 @@ -201,12 +207,12 @@ block102: } ; CHECK-LABEL: check_minsize: -; CHECK: jmp .LBB4_1 ; CHECK-NOT: align -; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: callq loop_latch -; CHECK-NEXT: .LBB4_1: +; CHECK: .LBB4_1: ; CHECK-NEXT: callq loop_header +; CHECK: callq loop_latch +; CHECK: .LBB4_3: +; CHECK: callq exit define void @check_minsize() minsize nounwind { diff --git a/llvm/test/CodeGen/X86/loop-rotate.ll b/llvm/test/CodeGen/X86/loop-rotate.ll new file mode 100644 index 00000000000..3f0a390e7c1 --- /dev/null +++ b/llvm/test/CodeGen/X86/loop-rotate.ll @@ -0,0 +1,120 @@ +; RUN: llc -mtriple=i686-linux < %s | FileCheck %s + +; Don't rotate the loop if the number of fall through to exit is not larger +; than the number of fall through to header. +define void @no_rotate() { +; CHECK-LABEL: no_rotate +; CHECK: %entry +; CHECK: %header +; CHECK: %middle +; CHECK: %latch1 +; CHECK: %latch2 +; CHECK: %end +entry: + br label %header + +header: + %val1 = call i1 @foo() + br i1 %val1, label %middle, label %end + +middle: + %val2 = call i1 @foo() + br i1 %val2, label %latch1, label %end + +latch1: + %val3 = call i1 @foo() + br i1 %val3, label %latch2, label %header + +latch2: + %val4 = call i1 @foo() + br label %header + +end: + ret void +} + +define void @do_rotate() { +; CHECK-LABEL: do_rotate +; CHECK: %entry +; CHECK: %then +; CHECK: %else +; CHECK: %latch1 +; CHECK: %latch2 +; CHECK: %header +; CHECK: %end +entry: + %val0 = call i1 @foo() + br i1 %val0, label %then, label %else + +then: + call void @a() + br label %header + +else: + call void @b() + br label %header + +header: + %val1 = call i1 @foo() + br i1 %val1, label %latch1, label %end + +latch1: + %val3 = call i1 @foo() + br i1 %val3, label %latch2, label %header + +latch2: + %val4 = call i1 @foo() + br label %header + +end: + ret void +} + +; The loop structure is same as in @no_rotate, but the loop header's predecessor +; doesn't fall through to it, so it should be rotated to get exit fall through. +define void @do_rotate2() { +; CHECK-LABEL: do_rotate2 +; CHECK: %entry +; CHECK: %then +; CHECK: %middle +; CHECK: %latch1 +; CHECK: %latch2 +; CHECK: %header +; CHECK: %exit +entry: + %val0 = call i1 @foo() + br i1 %val0, label %then, label %header, !prof !1 + +then: + call void @a() + br label %end + +header: + %val1 = call i1 @foo() + br i1 %val1, label %middle, label %exit + +middle: + %val2 = call i1 @foo() + br i1 %val2, label %latch1, label %exit + +latch1: + %val3 = call i1 @foo() + br i1 %val3, label %latch2, label %header + +latch2: + %val4 = call i1 @foo() + br label %header + +exit: + call void @b() + br label %end + +end: + ret void +} + +declare i1 @foo() +declare void @a() +declare void @b() + +!1 = !{!"branch_weights", i32 10, i32 1} diff --git a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll index d3e758e7c74..d551ed9a093 100644 --- a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll +++ b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -21,22 +21,7 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r ; GENERIC-NEXT: movq _Te1@{{.*}}(%rip), %r8 ; GENERIC-NEXT: movq _Te3@{{.*}}(%rip), %r10 ; GENERIC-NEXT: movq %rcx, %r11 -; GENERIC-NEXT: jmp LBB0_1 ; GENERIC-NEXT: .p2align 4, 0x90 -; GENERIC-NEXT: LBB0_2: ## %bb1 -; GENERIC-NEXT: ## in Loop: Header=BB0_1 Depth=1 -; GENERIC-NEXT: movl %edi, %ebx -; GENERIC-NEXT: shrl $16, %ebx -; GENERIC-NEXT: movzbl %bl, %ebx -; GENERIC-NEXT: xorl (%r8,%rbx,4), %eax -; GENERIC-NEXT: xorl -4(%r14), %eax -; GENERIC-NEXT: shrl $24, %edi -; GENERIC-NEXT: movzbl %bpl, %ebx -; GENERIC-NEXT: movl (%r10,%rbx,4), %ebx -; GENERIC-NEXT: xorl (%r9,%rdi,4), %ebx -; GENERIC-NEXT: xorl (%r14), %ebx -; GENERIC-NEXT: decq %r11 -; GENERIC-NEXT: addq $16, %r14 ; GENERIC-NEXT: LBB0_1: ## %bb ; GENERIC-NEXT: ## =>This Inner Loop Header: Depth=1 ; GENERIC-NEXT: movzbl %al, %edi @@ -56,8 +41,23 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r ; GENERIC-NEXT: shrl $24, %eax ; GENERIC-NEXT: movl (%r9,%rax,4), %eax ; GENERIC-NEXT: testq %r11, %r11 -; GENERIC-NEXT: jne LBB0_2 -; GENERIC-NEXT: ## %bb.3: ## %bb2 +; GENERIC-NEXT: je LBB0_3 +; GENERIC-NEXT: ## %bb.2: ## %bb1 +; GENERIC-NEXT: ## in Loop: Header=BB0_1 Depth=1 +; GENERIC-NEXT: movl %edi, %ebx +; GENERIC-NEXT: shrl $16, %ebx +; GENERIC-NEXT: movzbl %bl, %ebx +; GENERIC-NEXT: xorl (%r8,%rbx,4), %eax +; GENERIC-NEXT: xorl -4(%r14), %eax +; GENERIC-NEXT: shrl $24, %edi +; GENERIC-NEXT: movzbl %bpl, %ebx +; GENERIC-NEXT: movl (%r10,%rbx,4), %ebx +; GENERIC-NEXT: xorl (%r9,%rdi,4), %ebx +; GENERIC-NEXT: xorl (%r14), %ebx +; GENERIC-NEXT: decq %r11 +; GENERIC-NEXT: addq $16, %r14 +; GENERIC-NEXT: jmp LBB0_1 +; GENERIC-NEXT: LBB0_3: ## %bb2 ; GENERIC-NEXT: shlq $4, %rcx ; GENERIC-NEXT: andl $-16777216, %eax ## imm = 0xFF000000 ; GENERIC-NEXT: movl %edi, %ebx @@ -105,21 +105,7 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r ; ATOM-NEXT: movq _Te3@{{.*}}(%rip), %r10 ; ATOM-NEXT: decl %ecx ; ATOM-NEXT: movq %rcx, %r11 -; ATOM-NEXT: jmp LBB0_1 ; ATOM-NEXT: .p2align 4, 0x90 -; ATOM-NEXT: LBB0_2: ## %bb1 -; ATOM-NEXT: ## in Loop: Header=BB0_1 Depth=1 -; ATOM-NEXT: shrl $16, %eax -; ATOM-NEXT: shrl $24, %edi -; ATOM-NEXT: decq %r11 -; ATOM-NEXT: movzbl %al, %ebp -; ATOM-NEXT: movzbl %bl, %eax -; ATOM-NEXT: movl (%r10,%rax,4), %eax -; ATOM-NEXT: xorl (%r8,%rbp,4), %r15d -; ATOM-NEXT: xorl (%r9,%rdi,4), %eax -; ATOM-NEXT: xorl -4(%r14), %r15d -; ATOM-NEXT: xorl (%r14), %eax -; ATOM-NEXT: addq $16, %r14 ; ATOM-NEXT: LBB0_1: ## %bb ; ATOM-NEXT: ## =>This Inner Loop Header: Depth=1 ; ATOM-NEXT: movl %eax, %edi @@ -140,8 +126,22 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r ; ATOM-NEXT: movl (%r9,%rax,4), %r15d ; ATOM-NEXT: testq %r11, %r11 ; ATOM-NEXT: movl %edi, %eax -; ATOM-NEXT: jne LBB0_2 -; ATOM-NEXT: ## %bb.3: ## %bb2 +; ATOM-NEXT: je LBB0_3 +; ATOM-NEXT: ## %bb.2: ## %bb1 +; ATOM-NEXT: ## in Loop: Header=BB0_1 Depth=1 +; ATOM-NEXT: shrl $16, %eax +; ATOM-NEXT: shrl $24, %edi +; ATOM-NEXT: decq %r11 +; ATOM-NEXT: movzbl %al, %ebp +; ATOM-NEXT: movzbl %bl, %eax +; ATOM-NEXT: movl (%r10,%rax,4), %eax +; ATOM-NEXT: xorl (%r8,%rbp,4), %r15d +; ATOM-NEXT: xorl (%r9,%rdi,4), %eax +; ATOM-NEXT: xorl -4(%r14), %r15d +; ATOM-NEXT: xorl (%r14), %eax +; ATOM-NEXT: addq $16, %r14 +; ATOM-NEXT: jmp LBB0_1 +; ATOM-NEXT: LBB0_3: ## %bb2 ; ATOM-NEXT: shrl $16, %eax ; ATOM-NEXT: shrl $8, %edi ; ATOM-NEXT: movzbl %bl, %ebp diff --git a/llvm/test/CodeGen/X86/move_latch_to_loop_top.ll b/llvm/test/CodeGen/X86/move_latch_to_loop_top.ll index 718ec1efa3b..d86ec9c8129 100644 --- a/llvm/test/CodeGen/X86/move_latch_to_loop_top.ll +++ b/llvm/test/CodeGen/X86/move_latch_to_loop_top.ll @@ -1,11 +1,11 @@ -; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux --force-precise-rotation-cost < %s | FileCheck %s +; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s ; The block latch should be moved before header. ;CHECK-LABEL: test1: ;CHECK: %latch ;CHECK: %header ;CHECK: %false -define i32 @test1(i32* %p) !prof !0 { +define i32 @test1(i32* %p) { entry: br label %header @@ -39,7 +39,7 @@ exit: ;CHECK: %latch ;CHECK: %header ;CHECK: %false -define i32 @test2(i32* %p) !prof !0 { +define i32 @test2(i32* %p) { entry: br label %header @@ -107,7 +107,7 @@ exit: ;CHECK: %latch ;CHECK: %header ;CHECK: %false -define i32 @test3(i32* %p) !prof !0 { +define i32 @test3(i32* %p) { entry: br label %header @@ -173,9 +173,9 @@ exit: ;CHECK: %header ;CHECK: %true ;CHECK: %latch -;CHECK: %exit ;CHECK: %false -define i32 @test4(i32 %t, i32* %p) !prof !0 { +;CHECK: %exit +define i32 @test4(i32 %t, i32* %p) { entry: br label %header @@ -207,7 +207,6 @@ exit: ret i32 %count4 } -!0 = !{!"function_entry_count", i32 1000} !1 = !{!"branch_weights", i32 100, i32 1} !2 = !{!"branch_weights", i32 16, i32 16} !3 = !{!"branch_weights", i32 51, i32 49} @@ -217,7 +216,7 @@ exit: ;CHECK: %entry ;CHECK: %header ;CHECK: %latch -define void @test5(i32* %p) !prof !0 { +define void @test5(i32* %p) { entry: br label %header @@ -237,3 +236,4 @@ latch: exit: ret void } + diff --git a/llvm/test/CodeGen/X86/pr38185.ll b/llvm/test/CodeGen/X86/pr38185.ll index 778fb55b419..3a917f924c1 100644 --- a/llvm/test/CodeGen/X86/pr38185.ll +++ b/llvm/test/CodeGen/X86/pr38185.ll @@ -5,9 +5,13 @@ define void @foo(i32* %a, i32* %b, i32* noalias %c, i64 %s) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: movq $0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_2: # %body +; CHECK-NEXT: .LBB0_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %r9 +; CHECK-NEXT: cmpq %rcx, %r9 +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # %bb.2: # %body ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $1, (%rdx,%r9,4) ; CHECK-NEXT: movzbl (%rdi,%r9,4), %r8d @@ -17,12 +21,8 @@ define void @foo(i32* %a, i32* %b, i32* noalias %c, i64 %s) { ; CHECK-NEXT: movl %eax, (%rdi,%r9,4) ; CHECK-NEXT: incq %r9 ; CHECK-NEXT: movq %r9, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: .LBB0_1: # %loop -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %r9 -; CHECK-NEXT: cmpq %rcx, %r9 -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # %bb.3: # %endloop +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %endloop ; CHECK-NEXT: retq %i = alloca i64 store i64 0, i64* %i diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll index fd3d83ed2cb..9238ab0bf89 100644 --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -103,6 +103,34 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: xorl %r13d, %r13d ; CHECK-NEXT: jmp LBB0_13 ; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: LBB0_20: ## %sw.bb256 +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: decl %r15d +; CHECK-NEXT: testl %r15d, %r15d +; CHECK-NEXT: movl %r13d, %r14d +; CHECK-NEXT: jle LBB0_22 +; CHECK-NEXT: LBB0_13: ## %while.body200 +; CHECK-NEXT: ## =>This Loop Header: Depth=1 +; CHECK-NEXT: ## Child Loop BB0_30 Depth 2 +; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 +; CHECK-NEXT: leal -268(%r14), %eax +; CHECK-NEXT: cmpl $105, %eax +; CHECK-NEXT: ja LBB0_14 +; CHECK-NEXT: ## %bb.56: ## %while.body200 +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: movslq (%rdi,%rax,4), %rax +; CHECK-NEXT: addq %rdi, %rax +; CHECK-NEXT: jmpq *%rax +; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: movl %r14d, %r13d +; CHECK-NEXT: jne LBB0_21 +; CHECK-NEXT: jmp LBB0_55 +; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_14: ## %while.body200 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: leal 1(%r14), %eax @@ -118,12 +146,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: movl $1, %r13d ; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_44: ## %while.cond1037.preheader -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: movl %r14d, %r13d -; CHECK-NEXT: jne LBB0_21 -; CHECK-NEXT: jmp LBB0_55 ; CHECK-NEXT: LBB0_26: ## %sw.bb474 ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: testb %dl, %dl @@ -137,30 +159,52 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: ## %bb.28: ## %land.rhs485.preheader ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: ## implicit-def: $rax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jns LBB0_30 +; CHECK-NEXT: jmp LBB0_55 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_29: ## %land.rhs485 -; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 -; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 +; CHECK-NEXT: LBB0_32: ## %do.body479.backedge +; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 +; CHECK-NEXT: leaq 1(%r12), %rax +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: je LBB0_33 +; CHECK-NEXT: ## %bb.29: ## %land.rhs485 +; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: js LBB0_55 -; CHECK-NEXT: ## %bb.30: ## %cond.true.i.i2780 -; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 +; CHECK-NEXT: LBB0_30: ## %cond.true.i.i2780 +; CHECK-NEXT: ## Parent Loop BB0_13 Depth=1 +; CHECK-NEXT: ## => This Inner Loop Header: Depth=2 ; CHECK-NEXT: movq %rax, %r12 ; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: jne LBB0_32 ; CHECK-NEXT: ## %bb.31: ## %lor.rhs500 -; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 +; CHECK-NEXT: ## in Loop: Header=BB0_30 Depth=2 ; CHECK-NEXT: movl $256, %esi ## imm = 0x100 ; CHECK-NEXT: callq ___maskrune ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je LBB0_34 -; CHECK-NEXT: LBB0_32: ## %do.body479.backedge -; CHECK-NEXT: ## in Loop: Header=BB0_29 Depth=2 -; CHECK-NEXT: leaq 1(%r12), %rax -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: jne LBB0_29 -; CHECK-NEXT: ## %bb.33: ## %if.end517.loopexitsplit +; CHECK-NEXT: jne LBB0_32 +; CHECK-NEXT: jmp LBB0_34 +; CHECK-NEXT: LBB0_45: ## %sw.bb1134 +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: cmpq %rax, %rcx +; CHECK-NEXT: jb LBB0_55 +; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: xorl %ebp, %ebp +; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C +; CHECK-NEXT: jmp LBB0_21 +; CHECK-NEXT: LBB0_19: ## %sw.bb243 +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: movl $2, %r13d +; CHECK-NEXT: jmp LBB0_21 +; CHECK-NEXT: LBB0_40: ## %sw.bb566 +; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 +; CHECK-NEXT: movl $20, %r13d +; CHECK-NEXT: jmp LBB0_21 +; CHECK-NEXT: LBB0_33: ## %if.end517.loopexitsplit ; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: incq %r12 ; CHECK-NEXT: LBB0_34: ## %if.end517 @@ -199,47 +243,6 @@ define i8* @SyFgets(i8* %line, i64 %length, i64 %fid) { ; CHECK-NEXT: leaq {{.*}}(%rip), %rsi ; CHECK-NEXT: leaq {{.*}}(%rip), %rdi ; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_45: ## %sw.bb1134 -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx -; CHECK-NEXT: cmpq %rax, %rcx -; CHECK-NEXT: jb LBB0_55 -; CHECK-NEXT: ## %bb.46: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: movl $268, %r13d ## imm = 0x10C -; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_19: ## %sw.bb243 -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $2, %r13d -; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: LBB0_40: ## %sw.bb566 -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl $20, %r13d -; CHECK-NEXT: jmp LBB0_21 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_13: ## %while.body200 -; CHECK-NEXT: ## =>This Loop Header: Depth=1 -; CHECK-NEXT: ## Child Loop BB0_29 Depth 2 -; CHECK-NEXT: ## Child Loop BB0_38 Depth 2 -; CHECK-NEXT: leal -268(%r14), %eax -; CHECK-NEXT: cmpl $105, %eax -; CHECK-NEXT: ja LBB0_14 -; CHECK-NEXT: ## %bb.56: ## %while.body200 -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movslq (%rdi,%rax,4), %rax -; CHECK-NEXT: addq %rdi, %rax -; CHECK-NEXT: jmpq *%rax -; CHECK-NEXT: LBB0_20: ## %sw.bb256 -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: movl %r14d, %r13d -; CHECK-NEXT: LBB0_21: ## %while.cond197.backedge -; CHECK-NEXT: ## in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: decl %r15d -; CHECK-NEXT: testl %r15d, %r15d -; CHECK-NEXT: movl %r13d, %r14d -; CHECK-NEXT: jg LBB0_13 -; CHECK-NEXT: jmp LBB0_22 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_42: ## %while.cond864 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/X86/reverse_branches.ll b/llvm/test/CodeGen/X86/reverse_branches.ll index 9f51a6313cc..fabde167949 100644 --- a/llvm/test/CodeGen/X86/reverse_branches.ll +++ b/llvm/test/CodeGen/X86/reverse_branches.ll @@ -85,25 +85,36 @@ define i32 @test_branches_order() uwtable ssp { ; CHECK-NEXT: jg LBB0_16 ; CHECK-NEXT: LBB0_9: ## %for.cond18.preheader ; CHECK-NEXT: ## =>This Loop Header: Depth=1 -; CHECK-NEXT: ## Child Loop BB0_10 Depth 2 +; CHECK-NEXT: ## Child Loop BB0_11 Depth 2 ; CHECK-NEXT: ## Child Loop BB0_12 Depth 3 ; CHECK-NEXT: movq %rcx, %rdx ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7 +; CHECK-NEXT: jle LBB0_11 +; CHECK-NEXT: jmp LBB0_15 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_10: ## %for.cond18 +; CHECK-NEXT: LBB0_14: ## %exit +; CHECK-NEXT: ## in Loop: Header=BB0_11 Depth=2 +; CHECK-NEXT: addq %rsi, %rbp +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: decq %rsi +; CHECK-NEXT: addq $1001, %rdx ## imm = 0x3E9 +; CHECK-NEXT: cmpq $-1000, %rbp ## imm = 0xFC18 +; CHECK-NEXT: jne LBB0_5 +; CHECK-NEXT: ## %bb.10: ## %for.cond18 +; CHECK-NEXT: ## in Loop: Header=BB0_11 Depth=2 +; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7 +; CHECK-NEXT: jg LBB0_15 +; CHECK-NEXT: LBB0_11: ## %for.body20 ; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1 ; CHECK-NEXT: ## => This Loop Header: Depth=2 ; CHECK-NEXT: ## Child Loop BB0_12 Depth 3 -; CHECK-NEXT: cmpl $999, %edi ## imm = 0x3E7 -; CHECK-NEXT: jg LBB0_15 -; CHECK-NEXT: ## %bb.11: ## %for.body20 -; CHECK-NEXT: ## in Loop: Header=BB0_10 Depth=2 ; CHECK-NEXT: movq $-1000, %rbp ## imm = 0xFC18 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_12: ## %do.body.i ; CHECK-NEXT: ## Parent Loop BB0_9 Depth=1 -; CHECK-NEXT: ## Parent Loop BB0_10 Depth=2 +; CHECK-NEXT: ## Parent Loop BB0_11 Depth=2 ; CHECK-NEXT: ## => This Inner Loop Header: Depth=3 ; CHECK-NEXT: cmpb $120, 1000(%rdx,%rbp) ; CHECK-NEXT: je LBB0_14 @@ -111,16 +122,6 @@ define i32 @test_branches_order() uwtable ssp { ; CHECK-NEXT: ## in Loop: Header=BB0_12 Depth=3 ; CHECK-NEXT: incq %rbp ; CHECK-NEXT: jne LBB0_12 -; CHECK-NEXT: jmp LBB0_5 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB0_14: ## %exit -; CHECK-NEXT: ## in Loop: Header=BB0_10 Depth=2 -; CHECK-NEXT: addq %rsi, %rbp -; CHECK-NEXT: incq %rdi -; CHECK-NEXT: decq %rsi -; CHECK-NEXT: addq $1001, %rdx ## imm = 0x3E9 -; CHECK-NEXT: cmpq $-1000, %rbp ## imm = 0xFC18 -; CHECK-NEXT: je LBB0_10 ; CHECK-NEXT: LBB0_5: ## %if.then ; CHECK-NEXT: leaq {{.*}}(%rip), %rdi ; CHECK-NEXT: callq _puts diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening.ll b/llvm/test/CodeGen/X86/speculative-load-hardening.ll index 158243ad972..934581e137f 100644 --- a/llvm/test/CodeGen/X86/speculative-load-hardening.ll +++ b/llvm/test/CodeGen/X86/speculative-load-hardening.ll @@ -215,10 +215,7 @@ define void @test_basic_loop(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) nounwind sp ; X64-NEXT: movl %esi, %ebp ; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: xorl %ebx, %ebx -; X64-NEXT: jmp .LBB2_3 ; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB2_6: # in Loop: Header=BB2_3 Depth=1 -; X64-NEXT: cmovgeq %r15, %rax ; X64-NEXT: .LBB2_3: # %l.header ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movslq (%r12), %rcx @@ -237,8 +234,11 @@ define void @test_basic_loop(i32 %a, i32 %b, i32* %ptr1, i32* %ptr2) nounwind sp ; X64-NEXT: cmovneq %r15, %rax ; X64-NEXT: incl %ebx ; X64-NEXT: cmpl %ebp, %ebx -; X64-NEXT: jl .LBB2_6 -; X64-NEXT: # %bb.4: +; X64-NEXT: jge .LBB2_4 +; X64-NEXT: # %bb.6: # in Loop: Header=BB2_3 Depth=1 +; X64-NEXT: cmovgeq %r15, %rax +; X64-NEXT: jmp .LBB2_3 +; X64-NEXT: .LBB2_4: ; X64-NEXT: cmovlq %r15, %rax ; X64-NEXT: .LBB2_5: # %exit ; X64-NEXT: shlq $47, %rax @@ -328,20 +328,12 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt ; X64-NEXT: xorl %r13d, %r13d ; X64-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: testl %r15d, %r15d -; X64-NEXT: jg .LBB3_5 -; X64-NEXT: jmp .LBB3_4 -; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB3_12: -; X64-NEXT: cmovgeq %rbp, %rax -; X64-NEXT: testl %r15d, %r15d ; X64-NEXT: jle .LBB3_4 +; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB3_5: # %l2.header.preheader ; X64-NEXT: cmovleq %rbp, %rax ; X64-NEXT: xorl %r15d, %r15d -; X64-NEXT: jmp .LBB3_6 ; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB3_11: # in Loop: Header=BB3_6 Depth=1 -; X64-NEXT: cmovgeq %rbp, %rax ; X64-NEXT: .LBB3_6: # %l2.header ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movslq (%rbx), %rcx @@ -360,8 +352,12 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt ; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: incl %r15d ; X64-NEXT: cmpl %r12d, %r15d -; X64-NEXT: jl .LBB3_11 -; X64-NEXT: # %bb.7: +; X64-NEXT: jge .LBB3_7 +; X64-NEXT: # %bb.11: # in Loop: Header=BB3_6 Depth=1 +; X64-NEXT: cmovgeq %rbp, %rax +; X64-NEXT: jmp .LBB3_6 +; X64-NEXT: .p2align 4, 0x90 +; X64-NEXT: .LBB3_7: ; X64-NEXT: cmovlq %rbp, %rax ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Reload ; X64-NEXT: jmp .LBB3_8 @@ -385,8 +381,13 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt ; X64-NEXT: cmovneq %rbp, %rax ; X64-NEXT: incl %r13d ; X64-NEXT: cmpl %r15d, %r13d -; X64-NEXT: jl .LBB3_12 -; X64-NEXT: # %bb.9: +; X64-NEXT: jge .LBB3_9 +; X64-NEXT: # %bb.12: +; X64-NEXT: cmovgeq %rbp, %rax +; X64-NEXT: testl %r15d, %r15d +; X64-NEXT: jg .LBB3_5 +; X64-NEXT: jmp .LBB3_4 +; X64-NEXT: .LBB3_9: ; X64-NEXT: cmovlq %rbp, %rax ; X64-NEXT: .LBB3_10: # %exit ; X64-NEXT: shlq $47, %rax @@ -418,7 +419,17 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt ; X64-LFENCE-NEXT: movl %esi, %r15d ; X64-LFENCE-NEXT: lfence ; X64-LFENCE-NEXT: xorl %r12d, %r12d +; X64-LFENCE-NEXT: jmp .LBB3_2 ; X64-LFENCE-NEXT: .p2align 4, 0x90 +; X64-LFENCE-NEXT: .LBB3_5: # %l1.latch +; X64-LFENCE-NEXT: # in Loop: Header=BB3_2 Depth=1 +; X64-LFENCE-NEXT: lfence +; X64-LFENCE-NEXT: movslq (%rbx), %rax +; X64-LFENCE-NEXT: movl (%r14,%rax,4), %edi +; X64-LFENCE-NEXT: callq sink +; X64-LFENCE-NEXT: incl %r12d +; X64-LFENCE-NEXT: cmpl %r15d, %r12d +; X64-LFENCE-NEXT: jge .LBB3_6 ; X64-LFENCE-NEXT: .LBB3_2: # %l1.header ; X64-LFENCE-NEXT: # =>This Loop Header: Depth=1 ; X64-LFENCE-NEXT: # Child Loop BB3_4 Depth 2 @@ -440,15 +451,7 @@ define void @test_basic_nested_loop(i32 %a, i32 %b, i32 %c, i32* %ptr1, i32* %pt ; X64-LFENCE-NEXT: incl %ebp ; X64-LFENCE-NEXT: cmpl %r13d, %ebp ; X64-LFENCE-NEXT: jl .LBB3_4 -; X64-LFENCE-NEXT: .LBB3_5: # %l1.latch -; X64-LFENCE-NEXT: # in Loop: Header=BB3_2 Depth=1 -; X64-LFENCE-NEXT: lfence -; X64-LFENCE-NEXT: movslq (%rbx), %rax -; X64-LFENCE-NEXT: movl (%r14,%rax,4), %edi -; X64-LFENCE-NEXT: callq sink -; X64-LFENCE-NEXT: incl %r12d -; X64-LFENCE-NEXT: cmpl %r15d, %r12d -; X64-LFENCE-NEXT: jl .LBB3_2 +; X64-LFENCE-NEXT: jmp .LBB3_5 ; X64-LFENCE-NEXT: .LBB3_6: # %exit ; X64-LFENCE-NEXT: lfence ; X64-LFENCE-NEXT: addq $8, %rsp diff --git a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll index 13f582b6c99..c11ffaba12a 100644 --- a/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll +++ b/llvm/test/CodeGen/X86/tail-dup-merge-loop-headers.ll @@ -12,14 +12,17 @@ define void @tail_dup_merge_loops(i32 %a, i8* %b, i8* %c) local_unnamed_addr #0 ; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: incq %rsi ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: jmp .LBB0_5 +; CHECK-NEXT: je .LBB0_5 ; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_2: # %inner_loop_top +; CHECK-NEXT: # =>This Loop Header: Depth=1 +; CHECK-NEXT: # Child Loop BB0_4 Depth 2 +; CHECK-NEXT: cmpb $0, (%rsi) +; CHECK-NEXT: js .LBB0_3 ; CHECK-NEXT: .LBB0_4: # %inner_loop_latch -; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: # Parent Loop BB0_2 Depth=1 +; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: addq $2, %rsi -; CHECK-NEXT: .LBB0_2: # %inner_loop_top -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmpb $0, (%rsi) ; CHECK-NEXT: jns .LBB0_4 ; CHECK-NEXT: jmp .LBB0_3 @@ -130,58 +133,58 @@ define i32 @loop_shared_header(i8* %exe, i32 %exesz, i32 %headsize, i32 %min, i3 ; CHECK-NEXT: testl %ebp, %ebp ; CHECK-NEXT: je .LBB1_18 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB1_8: # %shared_loop_header +; CHECK-NEXT: .LBB1_9: # %shared_loop_header ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: testq %rbx, %rbx ; CHECK-NEXT: jne .LBB1_27 -; CHECK-NEXT: # %bb.9: # %inner_loop_body -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: # %bb.10: # %inner_loop_body +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jns .LBB1_8 -; CHECK-NEXT: # %bb.10: # %if.end96.i -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: jns .LBB1_9 +; CHECK-NEXT: # %bb.11: # %if.end96.i +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: cmpl $3, %ebp ; CHECK-NEXT: jae .LBB1_22 -; CHECK-NEXT: # %bb.11: # %if.end287.i -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: # %bb.12: # %if.end287.i +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: cmpl $1, %ebp ; CHECK-NEXT: setne %dl ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jne .LBB1_15 -; CHECK-NEXT: # %bb.12: # %if.end308.i -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: jne .LBB1_16 +; CHECK-NEXT: # %bb.13: # %if.end308.i +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: je .LBB1_17 -; CHECK-NEXT: # %bb.13: # %if.end335.i -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: je .LBB1_7 +; CHECK-NEXT: # %bb.14: # %if.end335.i +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: movl $0, %esi -; CHECK-NEXT: jne .LBB1_7 -; CHECK-NEXT: # %bb.14: # %merge_other -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: jne .LBB1_8 +; CHECK-NEXT: # %bb.15: # %merge_other +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: jmp .LBB1_16 -; CHECK-NEXT: .LBB1_15: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: jmp .LBB1_17 +; CHECK-NEXT: .LBB1_16: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: movb %dl, %sil ; CHECK-NEXT: addl $3, %esi -; CHECK-NEXT: .LBB1_16: # %outer_loop_latch -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: .LBB1_17: # %outer_loop_latch +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: # implicit-def: $dl -; CHECK-NEXT: jmp .LBB1_7 -; CHECK-NEXT: .LBB1_17: # %merge_predecessor_split -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: jmp .LBB1_8 +; CHECK-NEXT: .LBB1_7: # %merge_predecessor_split +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: movb $32, %dl ; CHECK-NEXT: xorl %esi, %esi -; CHECK-NEXT: .LBB1_7: # %outer_loop_latch -; CHECK-NEXT: # in Loop: Header=BB1_8 Depth=1 +; CHECK-NEXT: .LBB1_8: # %outer_loop_latch +; CHECK-NEXT: # in Loop: Header=BB1_9 Depth=1 ; CHECK-NEXT: movzwl %si, %esi ; CHECK-NEXT: decl %esi ; CHECK-NEXT: movzwl %si, %esi ; CHECK-NEXT: leaq 1(%rcx,%rsi), %rcx ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB1_8 +; CHECK-NEXT: jne .LBB1_9 ; CHECK-NEXT: .LBB1_18: # %while.cond.us1412.i ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al diff --git a/llvm/test/CodeGen/X86/tail-dup-repeat.ll b/llvm/test/CodeGen/X86/tail-dup-repeat.ll index 9a1867b8735..bfa1ee61145 100644 --- a/llvm/test/CodeGen/X86/tail-dup-repeat.ll +++ b/llvm/test/CodeGen/X86/tail-dup-repeat.ll @@ -10,35 +10,30 @@ define void @repeated_tail_dup(i1 %a1, i1 %a2, i32* %a4, i32* %a5, i8* %a6, i32 %a7) #0 align 2 { ; CHECK-LABEL: repeated_tail_dup: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_1: # %for.cond -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: je .LBB0_3 -; CHECK-NEXT: # %bb.2: # %land.lhs.true -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl $10, (%rdx) -; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_2: # %land.lhs.true +; CHECK-NEXT: movl $10, (%rdx) +; CHECK-NEXT: .LBB0_6: # %dup2 +; CHECK-NEXT: movl $2, (%rcx) +; CHECK-NEXT: testl %r9d, %r9d +; CHECK-NEXT: jne .LBB0_8 +; CHECK-NEXT: .LBB0_1: # %for.cond +; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: .LBB0_3: # %if.end56 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb $1, %sil ; CHECK-NEXT: je .LBB0_5 ; CHECK-NEXT: # %bb.4: # %if.then64 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movb $1, (%r8) ; CHECK-NEXT: testl %r9d, %r9d ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: jmp .LBB0_8 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_5: # %if.end70 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $12, (%rdx) -; CHECK-NEXT: .LBB0_6: # %dup2 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: movl $2, (%rcx) -; CHECK-NEXT: testl %r9d, %r9d -; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .LBB0_8: # %for.end ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll b/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll index 60eec509637..4fc1f6023b8 100644 --- a/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll +++ b/llvm/test/CodeGen/X86/vector-shift-by-select-loop.ll @@ -115,8 +115,17 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; SSE-NEXT: jne .LBB0_4 ; SSE-NEXT: # %bb.5: # %middle.block ; SSE-NEXT: cmpq %rax, %rdx -; SSE-NEXT: je .LBB0_9 +; SSE-NEXT: jne .LBB0_6 +; SSE-NEXT: .LBB0_9: # %for.cond.cleanup +; SSE-NEXT: retq ; SSE-NEXT: .p2align 4, 0x90 +; SSE-NEXT: .LBB0_8: # %for.body +; SSE-NEXT: # in Loop: Header=BB0_6 Depth=1 +; SSE-NEXT: # kill: def $cl killed $cl killed $ecx +; SSE-NEXT: shll %cl, (%rdi,%rdx,4) +; SSE-NEXT: incq %rdx +; SSE-NEXT: cmpq %rdx, %rax +; SSE-NEXT: je .LBB0_9 ; SSE-NEXT: .LBB0_6: # %for.body ; SSE-NEXT: # =>This Inner Loop Header: Depth=1 ; SSE-NEXT: cmpb $0, (%rsi,%rdx) @@ -125,15 +134,7 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; SSE-NEXT: # %bb.7: # %for.body ; SSE-NEXT: # in Loop: Header=BB0_6 Depth=1 ; SSE-NEXT: movl %r8d, %ecx -; SSE-NEXT: .LBB0_8: # %for.body -; SSE-NEXT: # in Loop: Header=BB0_6 Depth=1 -; SSE-NEXT: # kill: def $cl killed $cl killed $ecx -; SSE-NEXT: shll %cl, (%rdi,%rdx,4) -; SSE-NEXT: incq %rdx -; SSE-NEXT: cmpq %rdx, %rax -; SSE-NEXT: jne .LBB0_6 -; SSE-NEXT: .LBB0_9: # %for.cond.cleanup -; SSE-NEXT: retq +; SSE-NEXT: jmp .LBB0_8 ; ; AVX1-LABEL: vector_variable_shift_left_loop: ; AVX1: # %bb.0: # %entry @@ -241,8 +242,19 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; AVX1-NEXT: jne .LBB0_4 ; AVX1-NEXT: # %bb.5: # %middle.block ; AVX1-NEXT: cmpq %rax, %rdx -; AVX1-NEXT: je .LBB0_9 +; AVX1-NEXT: jne .LBB0_6 +; AVX1-NEXT: .LBB0_9: # %for.cond.cleanup +; AVX1-NEXT: addq $24, %rsp +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq ; AVX1-NEXT: .p2align 4, 0x90 +; AVX1-NEXT: .LBB0_8: # %for.body +; AVX1-NEXT: # in Loop: Header=BB0_6 Depth=1 +; AVX1-NEXT: # kill: def $cl killed $cl killed $ecx +; AVX1-NEXT: shll %cl, (%rdi,%rdx,4) +; AVX1-NEXT: incq %rdx +; AVX1-NEXT: cmpq %rdx, %rax +; AVX1-NEXT: je .LBB0_9 ; AVX1-NEXT: .LBB0_6: # %for.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX1-NEXT: cmpb $0, (%rsi,%rdx) @@ -251,17 +263,7 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; AVX1-NEXT: # %bb.7: # %for.body ; AVX1-NEXT: # in Loop: Header=BB0_6 Depth=1 ; AVX1-NEXT: movl %r8d, %ecx -; AVX1-NEXT: .LBB0_8: # %for.body -; AVX1-NEXT: # in Loop: Header=BB0_6 Depth=1 -; AVX1-NEXT: # kill: def $cl killed $cl killed $ecx -; AVX1-NEXT: shll %cl, (%rdi,%rdx,4) -; AVX1-NEXT: incq %rdx -; AVX1-NEXT: cmpq %rdx, %rax -; AVX1-NEXT: jne .LBB0_6 -; AVX1-NEXT: .LBB0_9: # %for.cond.cleanup -; AVX1-NEXT: addq $24, %rsp -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq +; AVX1-NEXT: jmp .LBB0_8 ; ; AVX2-LABEL: vector_variable_shift_left_loop: ; AVX2: # %bb.0: # %entry @@ -316,8 +318,18 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; AVX2-NEXT: jne .LBB0_4 ; AVX2-NEXT: # %bb.5: # %middle.block ; AVX2-NEXT: cmpq %rax, %rdx -; AVX2-NEXT: je .LBB0_9 +; AVX2-NEXT: jne .LBB0_6 +; AVX2-NEXT: .LBB0_9: # %for.cond.cleanup +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq ; AVX2-NEXT: .p2align 4, 0x90 +; AVX2-NEXT: .LBB0_8: # %for.body +; AVX2-NEXT: # in Loop: Header=BB0_6 Depth=1 +; AVX2-NEXT: # kill: def $cl killed $cl killed $ecx +; AVX2-NEXT: shll %cl, (%rdi,%rdx,4) +; AVX2-NEXT: incq %rdx +; AVX2-NEXT: cmpq %rdx, %rax +; AVX2-NEXT: je .LBB0_9 ; AVX2-NEXT: .LBB0_6: # %for.body ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX2-NEXT: cmpb $0, (%rsi,%rdx) @@ -326,16 +338,7 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; AVX2-NEXT: # %bb.7: # %for.body ; AVX2-NEXT: # in Loop: Header=BB0_6 Depth=1 ; AVX2-NEXT: movl %r8d, %ecx -; AVX2-NEXT: .LBB0_8: # %for.body -; AVX2-NEXT: # in Loop: Header=BB0_6 Depth=1 -; AVX2-NEXT: # kill: def $cl killed $cl killed $ecx -; AVX2-NEXT: shll %cl, (%rdi,%rdx,4) -; AVX2-NEXT: incq %rdx -; AVX2-NEXT: cmpq %rdx, %rax -; AVX2-NEXT: jne .LBB0_6 -; AVX2-NEXT: .LBB0_9: # %for.cond.cleanup -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX2-NEXT: jmp .LBB0_8 entry: %cmp12 = icmp sgt i32 %count, 0 br i1 %cmp12, label %for.body.preheader, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/X86/widen_arith-1.ll b/llvm/test/CodeGen/X86/widen_arith-1.ll index dd606bed523..a37dba14f22 100644 --- a/llvm/test/CodeGen/X86/widen_arith-1.ll +++ b/llvm/test/CodeGen/X86/widen_arith-1.ll @@ -7,9 +7,13 @@ define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind { ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_2: # %forbody +; CHECK-NEXT: .LBB0_1: # %forcond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl (%esp), %eax +; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: jge .LBB0_3 +; CHECK-NEXT: # %bb.2: # %forbody ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl (%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -19,12 +23,8 @@ define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind { ; CHECK-NEXT: pextrb $2, %xmm1, 2(%ecx,%eax,4) ; CHECK-NEXT: pextrw $0, %xmm1, (%ecx,%eax,4) ; CHECK-NEXT: incl (%esp) -; CHECK-NEXT: .LBB0_1: # %forcond -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl (%esp), %eax -; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: jl .LBB0_2 -; CHECK-NEXT: # %bb.3: # %afterfor +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %afterfor ; CHECK-NEXT: popl %eax ; CHECK-NEXT: retl entry: diff --git a/llvm/test/CodeGen/X86/widen_arith-2.ll b/llvm/test/CodeGen/X86/widen_arith-2.ll index 7b1fcbb4f1b..cf76f66dad2 100644 --- a/llvm/test/CodeGen/X86/widen_arith-2.ll +++ b/llvm/test/CodeGen/X86/widen_arith-2.ll @@ -10,9 +10,13 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind { ; CHECK-NEXT: movl $0, (%esp) ; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = <4,4,4,4,4,4,4,4,u,u,u,u,u,u,u,u> -; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_2: # %forbody +; CHECK-NEXT: .LBB0_1: # %forcond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl (%esp), %eax +; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: jge .LBB0_3 +; CHECK-NEXT: # %bb.2: # %forbody ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl (%esp), %eax ; CHECK-NEXT: leal (,%eax,8), %ecx @@ -26,12 +30,8 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind { ; CHECK-NEXT: pand %xmm1, %xmm2 ; CHECK-NEXT: movq %xmm2, (%edx,%eax,8) ; CHECK-NEXT: incl (%esp) -; CHECK-NEXT: .LBB0_1: # %forcond -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl (%esp), %eax -; CHECK-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: jl .LBB0_2 -; CHECK-NEXT: # %bb.3: # %afterfor +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %afterfor ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl entry: diff --git a/llvm/test/CodeGen/X86/widen_arith-3.ll b/llvm/test/CodeGen/X86/widen_arith-3.ll index ca86beda5c1..5b944beffde 100644 --- a/llvm/test/CodeGen/X86/widen_arith-3.ll +++ b/llvm/test/CodeGen/X86/widen_arith-3.ll @@ -17,9 +17,13 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind { ; CHECK-NEXT: movw $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $65537, {{[0-9]+}}(%esp) # imm = 0x10001 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_2: # %forbody +; CHECK-NEXT: .LBB0_1: # %forcond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: cmpl 16(%ebp), %eax +; CHECK-NEXT: jge .LBB0_3 +; CHECK-NEXT: # %bb.2: # %forbody ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl 12(%ebp), %edx @@ -30,12 +34,8 @@ define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind { ; CHECK-NEXT: pextrw $2, %xmm1, 4(%ecx,%eax,8) ; CHECK-NEXT: movd %xmm1, (%ecx,%eax,8) ; CHECK-NEXT: incl {{[0-9]+}}(%esp) -; CHECK-NEXT: .LBB0_1: # %forcond -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: cmpl 16(%ebp), %eax -; CHECK-NEXT: jl .LBB0_2 -; CHECK-NEXT: # %bb.3: # %afterfor +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %afterfor ; CHECK-NEXT: movl %ebp, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/widen_arith-4.ll b/llvm/test/CodeGen/X86/widen_arith-4.ll index f25e73ef2a0..490783ef657 100644 --- a/llvm/test/CodeGen/X86/widen_arith-4.ll +++ b/llvm/test/CodeGen/X86/widen_arith-4.ll @@ -16,9 +16,13 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind { ; SSE2-NEXT: movl $0, -{{[0-9]+}}(%rsp) ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = <271,271,271,271,271,u,u,u> ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <2,4,2,2,2,u,u,u> -; SSE2-NEXT: jmp .LBB0_1 ; SSE2-NEXT: .p2align 4, 0x90 -; SSE2-NEXT: .LBB0_2: # %forbody +; SSE2-NEXT: .LBB0_1: # %forcond +; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; SSE2-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax +; SSE2-NEXT: jge .LBB0_3 +; SSE2-NEXT: # %bb.2: # %forbody ; SSE2-NEXT: # in Loop: Header=BB0_1 Depth=1 ; SSE2-NEXT: movslq -{{[0-9]+}}(%rsp), %rax ; SSE2-NEXT: movq -{{[0-9]+}}(%rsp), %rcx @@ -31,12 +35,8 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind { ; SSE2-NEXT: pextrw $4, %xmm2, %edx ; SSE2-NEXT: movw %dx, 8(%rcx,%rax) ; SSE2-NEXT: incl -{{[0-9]+}}(%rsp) -; SSE2-NEXT: .LBB0_1: # %forcond -; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 -; SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: jl .LBB0_2 -; SSE2-NEXT: # %bb.3: # %afterfor +; SSE2-NEXT: jmp .LBB0_1 +; SSE2-NEXT: .LBB0_3: # %afterfor ; SSE2-NEXT: retq ; ; SSE41-LABEL: update: @@ -49,9 +49,13 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind { ; SSE41-NEXT: movw $0, -{{[0-9]+}}(%rsp) ; SSE41-NEXT: movl $0, -{{[0-9]+}}(%rsp) ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = <271,271,271,271,271,u,u,u> -; SSE41-NEXT: jmp .LBB0_1 ; SSE41-NEXT: .p2align 4, 0x90 -; SSE41-NEXT: .LBB0_2: # %forbody +; SSE41-NEXT: .LBB0_1: # %forcond +; SSE41-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE41-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; SSE41-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax +; SSE41-NEXT: jge .LBB0_3 +; SSE41-NEXT: # %bb.2: # %forbody ; SSE41-NEXT: # in Loop: Header=BB0_1 Depth=1 ; SSE41-NEXT: movslq -{{[0-9]+}}(%rsp), %rax ; SSE41-NEXT: movq -{{[0-9]+}}(%rsp), %rcx @@ -66,12 +70,8 @@ define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind { ; SSE41-NEXT: pextrw $4, %xmm1, 8(%rcx,%rax) ; SSE41-NEXT: movq %xmm2, (%rcx,%rax) ; SSE41-NEXT: incl -{{[0-9]+}}(%rsp) -; SSE41-NEXT: .LBB0_1: # %forcond -; SSE41-NEXT: # =>This Inner Loop Header: Depth=1 -; SSE41-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; SSE41-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax -; SSE41-NEXT: jl .LBB0_2 -; SSE41-NEXT: # %bb.3: # %afterfor +; SSE41-NEXT: jmp .LBB0_1 +; SSE41-NEXT: .LBB0_3: # %afterfor ; SSE41-NEXT: retq entry: %dst.addr = alloca <5 x i16>* diff --git a/llvm/test/CodeGen/X86/widen_arith-5.ll b/llvm/test/CodeGen/X86/widen_arith-5.ll index 6e486bb2ace..2c705faed96 100644 --- a/llvm/test/CodeGen/X86/widen_arith-5.ll +++ b/llvm/test/CodeGen/X86/widen_arith-5.ll @@ -14,9 +14,13 @@ define void @update(<3 x i32>* %dst, <3 x i32>* %src, i32 %n) nounwind { ; CHECK-NEXT: movl $1, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movl $0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movdqa {{.*#+}} xmm0 = <3,3,3,u> -; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_2: # %forbody +; CHECK-NEXT: .LBB0_1: # %forcond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: jge .LBB0_3 +; CHECK-NEXT: # %bb.2: # %forbody ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx @@ -28,12 +32,8 @@ define void @update(<3 x i32>* %dst, <3 x i32>* %src, i32 %n) nounwind { ; CHECK-NEXT: pextrd $2, %xmm1, 8(%rcx,%rax) ; CHECK-NEXT: movq %xmm1, (%rcx,%rax) ; CHECK-NEXT: incl -{{[0-9]+}}(%rsp) -; CHECK-NEXT: .LBB0_1: # %forcond -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax -; CHECK-NEXT: jl .LBB0_2 -; CHECK-NEXT: # %bb.3: # %afterfor +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %afterfor ; CHECK-NEXT: retq entry: %dst.addr = alloca <3 x i32>* diff --git a/llvm/test/CodeGen/X86/widen_arith-6.ll b/llvm/test/CodeGen/X86/widen_arith-6.ll index c039096604e..3b24cb0194e 100644 --- a/llvm/test/CodeGen/X86/widen_arith-6.ll +++ b/llvm/test/CodeGen/X86/widen_arith-6.ll @@ -15,9 +15,13 @@ define void @update(<3 x float>* %dst, <3 x float>* %src, i32 %n) nounwind { ; CHECK-NEXT: movl $1065353216, {{[0-9]+}}(%esp) # imm = 0x3F800000 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movaps {{.*#+}} xmm0 = <1.97604004E+3,1.97604004E+3,1.97604004E+3,u> -; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_2: # %forbody +; CHECK-NEXT: .LBB0_1: # %forcond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: cmpl 16(%ebp), %eax +; CHECK-NEXT: jge .LBB0_3 +; CHECK-NEXT: # %bb.2: # %forbody ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl 8(%ebp), %ecx @@ -30,12 +34,8 @@ define void @update(<3 x float>* %dst, <3 x float>* %src, i32 %n) nounwind { ; CHECK-NEXT: extractps $1, %xmm1, 4(%ecx,%eax) ; CHECK-NEXT: movss %xmm1, (%ecx,%eax) ; CHECK-NEXT: incl {{[0-9]+}}(%esp) -; CHECK-NEXT: .LBB0_1: # %forcond -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: cmpl 16(%ebp), %eax -; CHECK-NEXT: jl .LBB0_2 -; CHECK-NEXT: # %bb.3: # %afterfor +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %afterfor ; CHECK-NEXT: movl %ebp, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl diff --git a/llvm/test/CodeGen/X86/widen_cast-4.ll b/llvm/test/CodeGen/X86/widen_cast-4.ll index 9a2304ff467..f317d4b5913 100644 --- a/llvm/test/CodeGen/X86/widen_cast-4.ll +++ b/llvm/test/CodeGen/X86/widen_cast-4.ll @@ -11,9 +11,13 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind { ; WIDE-NEXT: pcmpeqd %xmm0, %xmm0 ; WIDE-NEXT: movdqa {{.*#+}} xmm1 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63] ; WIDE-NEXT: movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] -; WIDE-NEXT: jmp .LBB0_1 ; WIDE-NEXT: .p2align 4, 0x90 -; WIDE-NEXT: .LBB0_2: # %forbody +; WIDE-NEXT: .LBB0_1: # %forcond +; WIDE-NEXT: # =>This Inner Loop Header: Depth=1 +; WIDE-NEXT: movl (%esp), %eax +; WIDE-NEXT: cmpl {{[0-9]+}}(%esp), %eax +; WIDE-NEXT: jge .LBB0_3 +; WIDE-NEXT: # %bb.2: # %forbody ; WIDE-NEXT: # in Loop: Header=BB0_1 Depth=1 ; WIDE-NEXT: movl (%esp), %eax ; WIDE-NEXT: leal (,%eax,8), %ecx @@ -30,12 +34,8 @@ define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind { ; WIDE-NEXT: psubb %xmm2, %xmm3 ; WIDE-NEXT: movq %xmm3, (%edx,%eax,8) ; WIDE-NEXT: incl (%esp) -; WIDE-NEXT: .LBB0_1: # %forcond -; WIDE-NEXT: # =>This Inner Loop Header: Depth=1 -; WIDE-NEXT: movl (%esp), %eax -; WIDE-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; WIDE-NEXT: jl .LBB0_2 -; WIDE-NEXT: # %bb.3: # %afterfor +; WIDE-NEXT: jmp .LBB0_1 +; WIDE-NEXT: .LBB0_3: # %afterfor ; WIDE-NEXT: addl $12, %esp ; WIDE-NEXT: retl entry: |