diff options
-rw-r--r-- | llvm/lib/CodeGen/BranchFolding.cpp | 32 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/tail-merge-after-mbp.ll | 92 |
3 files changed, 116 insertions, 10 deletions
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 1a783b61276..a2d304bb078 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -1005,6 +1005,24 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { MachineBasicBlock *IBB = &*I; MachineBasicBlock *PredBB = &*std::prev(I); MergePotentials.clear(); + MachineLoop *ML; + + // Bail if merging after placement and IBB is the loop header because + // -- If merging predecessors that belong to the same loop as IBB, the + // common tail of merged predecessors may become the loop top if block + // placement is called again and the predecessors may branch to this common + // tail and require more branches. This can be relaxed if + // MachineBlockPlacement::findBestLoopTop is more flexible. + // --If merging predecessors that do not belong to the same loop as IBB, the + // loop info of IBB's loop and the other loops may be affected. Calling the + // block placement again may make big change to the layout and eliminate the + // reason to do tail merging here. + if (AfterBlockPlacement && MLI) { + ML = MLI->getLoopFor(IBB); + if (ML && IBB == ML->getHeader()) + continue; + } + for (MachineBasicBlock *PBB : I->predecessors()) { if (MergePotentials.size() == TailMergeThreshold) break; @@ -1024,16 +1042,12 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (PBB->hasEHPadSuccessor()) continue; - // Bail out if the loop header (IBB) is not the top of the loop chain - // after the block placement. Otherwise, the common tail of IBB's - // predecessors may become the loop top if block placement is called again - // and the predecessors may branch to this common tail. - // FIXME: Relaxed this check if the algorithm of finding loop top is - // changed in MBP. + // After block placement, only consider predecessors that belong to the + // same loop as IBB. The reason is the same as above when skipping loop + // header. if (AfterBlockPlacement && MLI) - if (MachineLoop *ML = MLI->getLoopFor(IBB)) - if (IBB == ML->getHeader() && ML == MLI->getLoopFor(PBB)) - continue; + if (ML != MLI->getLoopFor(PBB)) + continue; MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector<MachineOperand, 4> Cond; diff --git a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll index 151cc1b12ed..04eae8f9afe 100644 --- a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -49,7 +49,7 @@ tailrecurse.switch: ; preds = %tailrecurse ; V8-NEXT: beq ; V8-NEXT: %tailrecurse.switch ; V8: cmp -; V8-NEXT: beq +; V8-NEXT: bne ; V8-NEXT: b ; The trailing space in the last line checks that the branch is unconditional switch i32 %and, label %sw.epilog [ diff --git a/llvm/test/CodeGen/X86/tail-merge-after-mbp.ll b/llvm/test/CodeGen/X86/tail-merge-after-mbp.ll new file mode 100644 index 00000000000..7a8d2e8000e --- /dev/null +++ b/llvm/test/CodeGen/X86/tail-merge-after-mbp.ll @@ -0,0 +1,92 @@ +; RUN: llc -mtriple=x86_64-linux -o - %s | FileCheck %s + +%0 = type { %1, %3* } +%1 = type { %2* } +%2 = type { %2*, i8* } +%3 = type { i32, i32 (i32, i32)* } + + +declare i32 @Up(...) +declare i32 @f(i32, i32) + +; check loop block BB#10 is not merged with LBB0_12 +; check loop block LBB0_9 is not merged with BB#11, BB#13 +define i32 @foo(%0* nocapture readonly, i32, i1 %c, i8* %p1, %2** %p2) { +; CHECK-LABEL: foo: +; CHECK: LBB0_9: +; CHECK-NEXT: movq (%r14), %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: je +; CHECK-NEXT:# BB#10: +; CHECK-NEXT: cmpq $0, 8(%rax) +; CHECK-NEXT: jne +; CHECK-NEXT:# BB#11: +; CHECK-NEXT: movq (%r14), %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: je +; CHECK-NEXT:LBB0_12: +; CHECK-NEXT: cmpq $0, 8(%rax) +; CHECK-NEXT: jne +; CHECK-NEXT:# BB#13: +; CHECK-NEXT: movq (%r14), %rax +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: jne + br i1 %c, label %34, label %3 + +; <label>:3: ; preds = %2 + br i1 %c, label %7, label %4 + +; <label>:4: ; preds = %3 + %5 = tail call i32 @f(i32 undef, i32 undef) + %6 = icmp eq i32 %5, 0 + br i1 %6, label %7, label %34 + +; <label>:7: ; preds = %4, %3 + %8 = icmp eq %2* null, null + br i1 %8, label %34, label %9 + +; <label>:9: ; preds = %7 + %10 = icmp eq i8* %p1, null + br i1 %10, label %11, label %32 + +; <label>:11: ; preds = %9 + %12 = load %2*, %2** %p2, align 8 + %13 = icmp eq %2* %12, null + br i1 %13, label %34, label %14 + +; <label>:14: ; preds = %11 + %15 = getelementptr inbounds %2, %2* %12, i64 0, i32 1 + %16 = load i8*, i8** %15, align 8 + %17 = icmp eq i8* %16, null + br i1 %17, label %18, label %32 + +; <label>:18: ; preds = %14 + %19 = load %2*, %2** %p2, align 8 + %20 = icmp eq %2* %19, null + br i1 %20, label %34, label %21 + +; <label>:21: ; preds = %18 + %22 = getelementptr inbounds %2, %2* %19, i64 0, i32 1 + %23 = load i8*, i8** %22, align 8 + %24 = icmp eq i8* %23, null + br i1 %24, label %25, label %32 + +; <label>:25: ; preds = %28, %21 + %26 = load %2*, %2** %p2, align 8 + %27 = icmp eq %2* %26, null + br i1 %27, label %34, label %28 + +; <label>:28: ; preds = %25 + %29 = getelementptr inbounds %2, %2* %26, i64 0, i32 1 + %30 = load i8*, i8** %29, align 8 + %31 = icmp eq i8* %30, null + br i1 %31, label %25, label %32 + +; <label>:32: ; preds = %28, %21, %14, %9 + %33 = tail call i32 (...) @Up() + br label %34 + +; <label>:34: ; preds = %32, %25, %18, %11, %7, %4, %2 + %35 = phi i32 [ 0, %2 ], [ %5, %4 ], [ 0, %7 ], [ 0, %11 ], [ 0, %32 ], [ 0, %18 ], [ 0, %25 ] + ret i32 %35 +} |