summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp66
-rw-r--r--llvm/lib/CodeGen/BranchFolding.h12
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp39
-rw-r--r--llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll63
-rw-r--r--llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll2
5 files changed, 156 insertions, 26 deletions
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 1094f52efbb..63ffc1fbb8f 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -137,6 +138,8 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
// Remove the block.
MF->erase(MBB);
FuncletMembership.erase(MBB);
+ if (MLI)
+ MLI->removeBlock(MBB);
}
/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
@@ -193,18 +196,22 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
}
/// OptimizeFunction - Perhaps branch folding, tail merging and other
-/// CFG optimizations on the given function.
+/// CFG optimizations on the given function. Block placement changes the layout
+/// and may create new tail merging opportunities.
bool BranchFolder::OptimizeFunction(MachineFunction &MF,
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
- MachineModuleInfo *mmi) {
+ MachineModuleInfo *mmi,
+ MachineLoopInfo *mli, bool AfterPlacement) {
if (!tii) return false;
TriedMerging.clear();
+ AfterBlockPlacement = AfterPlacement;
TII = tii;
TRI = tri;
MMI = mmi;
+ MLI = mli;
RS = nullptr;
// Use a RegScavenger to help update liveness when required.
@@ -230,7 +237,10 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
bool MadeChangeThisIteration = true;
while (MadeChangeThisIteration) {
MadeChangeThisIteration = TailMergeBlocks(MF);
- MadeChangeThisIteration |= OptimizeBranches(MF);
+ // No need to clean up if tail merging does not change anything after the
+ // block placement.
+ if (!AfterBlockPlacement || MadeChangeThisIteration)
+ MadeChangeThisIteration |= OptimizeBranches(MF);
if (EnableHoistCommonCode)
MadeChangeThisIteration |= HoistCommonCode(MF);
MadeChange |= MadeChangeThisIteration;
@@ -447,6 +457,11 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
// Splice the code over.
NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+ // NewMBB belongs to the same loop as CurMBB.
+ if (MLI)
+ if (MachineLoop *ML = MLI->getLoopFor(&CurMBB))
+ ML->addBasicBlockToLoop(NewMBB, MLI->getBase());
+
// NewMBB inherits CurMBB's block frequency.
MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
@@ -934,23 +949,27 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (!EnableTailMerge) return MadeChange;
// First find blocks with no successors.
- MergePotentials.clear();
- for (MachineBasicBlock &MBB : MF) {
- if (MergePotentials.size() == TailMergeThreshold)
- break;
- if (!TriedMerging.count(&MBB) && MBB.succ_empty())
- MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
- }
+ // Block placement does not create new tail merging opportunities for these
+ // blocks.
+ if (!AfterBlockPlacement) {
+ MergePotentials.clear();
+ for (MachineBasicBlock &MBB : MF) {
+ if (MergePotentials.size() == TailMergeThreshold)
+ break;
+ if (!TriedMerging.count(&MBB) && MBB.succ_empty())
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
+ }
- // If this is a large problem, avoid visiting the same basic blocks
- // multiple times.
- if (MergePotentials.size() == TailMergeThreshold)
- for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
- TriedMerging.insert(MergePotentials[i].getBlock());
+ // If this is a large problem, avoid visiting the same basic blocks
+ // multiple times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
- // See if we can do any tail merging on those.
- if (MergePotentials.size() >= 2)
- MadeChange |= TryTailMergeBlocks(nullptr, nullptr);
+ // See if we can do any tail merging on those.
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(nullptr, nullptr);
+ }
// Look at blocks (IBB) with multiple predecessors (PBB).
// We change each predecessor to a canonical form, by
@@ -997,6 +1016,17 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (PBB->hasEHPadSuccessor())
continue;
+ // Bail out if the loop header (IBB) is not the top of the loop chain
+ // after the block placement. Otherwise, the common tail of IBB's
+ // predecessors may become the loop top if block placement is called again
+ // and the predecessors may branch to this common tail.
+ // FIXME: Relaxed this check if the algorithm of finding loop top is
+ // changed in MBP.
+ if (AfterBlockPlacement && MLI)
+ if (MachineLoop *ML = MLI->getLoopFor(IBB))
+ if (IBB == ML->getHeader() && ML == MLI->getLoopFor(PBB))
+ continue;
+
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h
index 156641e351b..f7040990f13 100644
--- a/llvm/lib/CodeGen/BranchFolding.h
+++ b/llvm/lib/CodeGen/BranchFolding.h
@@ -20,6 +20,7 @@ namespace llvm {
class MachineBranchProbabilityInfo;
class MachineFunction;
class MachineModuleInfo;
+ class MachineLoopInfo;
class RegScavenger;
class TargetInstrInfo;
class TargetRegisterInfo;
@@ -32,10 +33,11 @@ namespace llvm {
MBFIWrapper &MBFI,
const MachineBranchProbabilityInfo &MBPI);
- bool OptimizeFunction(MachineFunction &MF,
- const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri,
- MachineModuleInfo *mmi);
+ bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri, MachineModuleInfo *mmi,
+ MachineLoopInfo *mli = nullptr,
+ bool AfterPlacement = false);
+
private:
class MergePotentialsElt {
unsigned Hash;
@@ -93,11 +95,13 @@ namespace llvm {
};
std::vector<SameTailElt> SameTails;
+ bool AfterBlockPlacement;
bool EnableTailMerge;
bool EnableHoistCommonCode;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineModuleInfo *MMI;
+ MachineLoopInfo *MLI;
RegScavenger *RS;
public:
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index c562af9d964..42bad4c7301 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -26,6 +26,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "BranchFolding.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -116,6 +118,12 @@ static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
cl::desc("Cost of jump instructions."),
cl::init(1), cl::Hidden);
+static cl::opt<bool>
+BranchFoldPlacement("branch-fold-placement",
+ cl::desc("Perform branch folding during placement. "
+ "Reduces code size."),
+ cl::init(true), cl::Hidden);
+
extern cl::opt<unsigned> StaticLikelyProb;
namespace {
@@ -232,10 +240,10 @@ class MachineBlockPlacement : public MachineFunctionPass {
const MachineBranchProbabilityInfo *MBPI;
/// \brief A handle to the function-wide block frequency pass.
- const MachineBlockFrequencyInfo *MBFI;
+ std::unique_ptr<BranchFolder::MBFIWrapper> MBFI;
/// \brief A handle to the loop info.
- const MachineLoopInfo *MLI;
+ MachineLoopInfo *MLI;
/// \brief A handle to the target's instruction info.
const TargetInstrInfo *TII;
@@ -323,6 +331,7 @@ public:
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -1462,7 +1471,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
return false;
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MBFI = llvm::make_unique<BranchFolder::MBFIWrapper>(
+ getAnalysis<MachineBlockFrequencyInfo>());
MLI = &getAnalysis<MachineLoopInfo>();
TII = F.getSubtarget().getInstrInfo();
TLI = F.getSubtarget().getTargetLowering();
@@ -1470,6 +1480,29 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
assert(BlockToChain.empty());
buildCFGChains(F);
+
+ // Changing the layout can create new tail merging opportunities.
+ TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+ // TailMerge can create jump into if branches that make CFG irreducible for
+ // HW that requires structurized CFG.
+ bool EnableTailMerge = !F.getTarget().requiresStructuredCFG() &&
+ PassConfig->getEnableTailMerge() &&
+ BranchFoldPlacement;
+ // No tail merging opportunities if the block number is less than four.
+ if (F.size() > 3 && EnableTailMerge) {
+ BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
+ *MBPI);
+
+ if (BF.OptimizeFunction(F, TII, F.getSubtarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
+ /*AfterBlockPlacement=*/true)) {
+ // Redo the layout if tail merging creates/removes/moves blocks.
+ BlockToChain.clear();
+ ChainAllocator.DestroyAll();
+ buildCFGChains(F);
+ }
+ }
+
optimizeBranches(F);
alignBlocks(F);
diff --git a/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll b/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll
new file mode 100644
index 00000000000..f7b9f9d4c23
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/tailmerging_in_mbp.ll
@@ -0,0 +1,63 @@
+; RUN: llc <%s -march=aarch64 | FileCheck %s
+
+; CHECK-LABEL: test:
+; CHECK: .LBB0_7
+; CHECK: b.hi .LBB0_2
+; CHECK-NEXT: b .LBB0_9
+; CHECK-NEXT: .LBB0_8
+; CHECK-NEXT: mov x8, x9
+; CHECK-NEXT: .LBB0_9
+define i64 @test(i64 %n, i64* %a, i64* %b, i64* %c, i64* %d, i64* %e, i64* %f) {
+entry:
+ %cmp28 = icmp sgt i64 %n, 1
+ br i1 %cmp28, label %for.body, label %for.end
+
+for.body: ; preds = %for.body.lr.ph, %if.end
+ %j = phi i64 [ %n, %entry ], [ %div, %if.end ]
+ %div = lshr i64 %j, 1
+ %a.arrayidx = getelementptr inbounds i64, i64* %a, i64 %div
+ %a.j = load i64, i64* %a.arrayidx
+ %b.arrayidx = getelementptr inbounds i64, i64* %b, i64 %div
+ %b.j = load i64, i64* %b.arrayidx
+ %cmp.i = icmp slt i64 %a.j, %b.j
+ br i1 %cmp.i, label %for.end.loopexit, label %cond.false.i
+
+cond.false.i: ; preds = %for.body
+ %cmp4.i = icmp sgt i64 %a.j, %b.j
+ br i1 %cmp4.i, label %if.end, label %cond.false6.i
+
+cond.false6.i: ; preds = %cond.false.i
+ %c.arrayidx = getelementptr inbounds i64, i64* %c, i64 %div
+ %c.j = load i64, i64* %c.arrayidx
+ %d.arrayidx = getelementptr inbounds i64, i64* %d, i64 %div
+ %d.j = load i64, i64* %d.arrayidx
+ %cmp9.i = icmp slt i64 %c.j, %d.j
+ br i1 %cmp9.i, label %for.end.loopexit, label %cond.false11.i
+
+cond.false11.i: ; preds = %cond.false6.i
+ %cmp14.i = icmp sgt i64 %c.j, %d.j
+ br i1 %cmp14.i, label %if.end, label %cond.false12.i
+
+cond.false12.i: ; preds = %cond.false11.i
+ %e.arrayidx = getelementptr inbounds i64, i64* %e, i64 %div
+ %e.j = load i64, i64* %e.arrayidx
+ %f.arrayidx = getelementptr inbounds i64, i64* %f, i64 %div
+ %f.j = load i64, i64* %f.arrayidx
+ %cmp19.i = icmp sgt i64 %e.j, %f.j
+ br i1 %cmp19.i, label %if.end, label %for.end.loopexit
+
+if.end: ; preds = %cond.false12.i, %cond.false11.i, %cond.false.i
+ %cmp = icmp ugt i64 %j, 3
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %cond.false12.i, %cond.false6.i, %for.body, %if.end
+ %j.0.lcssa.ph = phi i64 [ %j, %cond.false12.i ], [ %j, %cond.false6.i ], [ %j, %for.body ], [ %div, %if.end ]
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %j.0.lcssa = phi i64 [ %n, %entry ], [ %j.0.lcssa.ph, %for.end.loopexit ]
+ %j.2 = add i64 %j.0.lcssa, %n
+ %j.3 = mul i64 %j.2, %n
+ %j.4 = add i64 %j.3, 10
+ ret i64 %j.4
+}
diff --git a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 04eae8f9afe..151cc1b12ed 100644
--- a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -49,7 +49,7 @@ tailrecurse.switch: ; preds = %tailrecurse
; V8-NEXT: beq
; V8-NEXT: %tailrecurse.switch
; V8: cmp
-; V8-NEXT: bne
+; V8-NEXT: beq
; V8-NEXT: b
; The trailing space in the last line checks that the branch is unconditional
switch i32 %and, label %sw.epilog [
OpenPOWER on IntegriCloud