summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2018-06-25 23:32:54 +0000
committerChandler Carruth <chandlerc@gmail.com>2018-06-25 23:32:54 +0000
commit1652996fd619afd41a3b01441298f51b5f0128a1 (patch)
treec9a905d1e85e023cbcd2ec77551a21202dcfc658 /llvm/lib/Transforms/Scalar
parent32447ff5b9592064d94a2fad5ffd71c5fcb35181 (diff)
downloadbcm5719-llvm-1652996fd619afd41a3b01441298f51b5f0128a1.tar.gz
bcm5719-llvm-1652996fd619afd41a3b01441298f51b5f0128a1.zip
[PM/LoopUnswitch] Teach the new unswitch to handle nontrivial
unswitching of switches. This works much like trivial unswitching of switches in that it reliably moves the switch out of the loop. Here we potentially clone the entire loop into each successor of the switch and re-point the cases at these clones. Due to the complexity of actually doing nontrivial unswitching, this patch doesn't create a dedicated routine for handling switches -- it would duplicate far too much code. Instead, it generalizes the existing routine to handle both branches and switches as it largely reduces to looping in a few places instead of doing something once. This actually improves the results in some cases with branches due to being much more careful about how dead regions of code are managed. With branches, because exactly one clone is created and there are exactly two edges considered, somewhat sloppy handling of the dead regions of code was sufficient in most cases. But with switches, there are much more complicated patterns of dead code and so I've had to move to a more robust model generally. We still do as much pruning of the dead code early as possible because that allows us to avoid even cloning the code. This also surfaced another problem with nontrivial unswitching before which is that we weren't as precise in reconstructing loops as we could have been. This seems to have been mostly harmless, but resulted in pointless LCSSA PHI nodes and other unnecessary cruft. With switches, we have to get this *right*, and everything benefits from it. While the testing may seem a bit light here because we only have two real cases with actual switches, they do a surprisingly good job of exercising numerous edge cases. Also, because we share the logic with branches, most of the changes in this patch are reasonably well covered by existing tests. The new unswitch now has all of the same fundamental power as the old one with the exception of the single unsound case of *partial* switch unswitching -- that really is just loop specialization and not unswitching at all. It doesn't fit into the canonicalization model in any way. We can add a loop specialization pass that runs late based on profile data if important test cases ever come up here. Differential Revision: https://reviews.llvm.org/D47683 llvm-svn: 335553
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp357
1 files changed, 201 insertions, 156 deletions
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 73e251ff654..5a84d74683a 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -715,8 +715,12 @@ static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
///
/// This routine handles cloning all of the necessary loop blocks and exit
/// blocks including rewriting their instructions and the relevant PHI nodes.
-/// It skips loop and exit blocks that are not necessary based on the provided
-/// set. It also correctly creates the unconditional branch in the cloned
+/// Any loop blocks or exit blocks which are dominated by a different successor
+/// than the one for this clone of the loop blocks can be trivially skipped. We
+/// use the `DominatingSucc` map to determine whether a block satisfies that
+/// property with a simple map lookup.
+///
+/// It also correctly creates the unconditional branch in the cloned
/// unswitched parent block to only point at the unswitched successor.
///
/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
@@ -730,7 +734,7 @@ static BasicBlock *buildClonedLoopBlocks(
Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
- const SmallPtrSetImpl<BasicBlock *> &SkippedLoopAndExitBlocks,
+ const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
ValueToValueMapTy &VMap,
SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
DominatorTree &DT, LoopInfo &LI) {
@@ -751,19 +755,26 @@ static BasicBlock *buildClonedLoopBlocks(
return NewBB;
};
+ // We skip cloning blocks when they have a dominating succ that is not the
+ // succ we are cloning for.
+ auto SkipBlock = [&](BasicBlock *BB) {
+ auto It = DominatingSucc.find(BB);
+ return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
+ };
+
// First, clone the preheader.
auto *ClonedPH = CloneBlock(LoopPH);
// Then clone all the loop blocks, skipping the ones that aren't necessary.
for (auto *LoopBB : L.blocks())
- if (!SkippedLoopAndExitBlocks.count(LoopBB))
+ if (!SkipBlock(LoopBB))
CloneBlock(LoopBB);
// Split all the loop exit edges so that when we clone the exit blocks, if
// any of the exit blocks are *also* a preheader for some other loop, we
// don't create multiple predecessors entering the loop header.
for (auto *ExitBB : ExitBlocks) {
- if (SkippedLoopAndExitBlocks.count(ExitBB))
+ if (SkipBlock(ExitBB))
continue;
// When we are going to clone an exit, we don't need to clone all the
@@ -841,7 +852,7 @@ static BasicBlock *buildClonedLoopBlocks(
// Update any PHI nodes in the cloned successors of the skipped blocks to not
// have spurious incoming values.
for (auto *LoopBB : L.blocks())
- if (SkippedLoopAndExitBlocks.count(LoopBB))
+ if (SkipBlock(LoopBB))
for (auto *SuccBB : successors(LoopBB))
if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
for (PHINode &PN : ClonedSuccBB->phis())
@@ -1175,10 +1186,41 @@ static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
}
static void
+deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
+ ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
+ DominatorTree &DT) {
+ // Find all the dead clones, and remove them from their successors.
+ SmallVector<BasicBlock *, 16> DeadBlocks;
+ for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
+ for (auto &VMap : VMaps)
+ if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
+ if (!DT.isReachableFromEntry(ClonedBB)) {
+ for (BasicBlock *SuccBB : successors(ClonedBB))
+ SuccBB->removePredecessor(ClonedBB);
+ DeadBlocks.push_back(ClonedBB);
+ }
+
+ // Drop any remaining references to break cycles.
+ for (BasicBlock *BB : DeadBlocks)
+ BB->dropAllReferences();
+ // Erase them from the IR.
+ for (BasicBlock *BB : DeadBlocks)
+ BB->eraseFromParent();
+}
+
+static void
deleteDeadBlocksFromLoop(Loop &L,
- const SmallVectorImpl<BasicBlock *> &DeadBlocks,
SmallVectorImpl<BasicBlock *> &ExitBlocks,
DominatorTree &DT, LoopInfo &LI) {
+ // Find all the dead blocks, and remove them from their successors.
+ SmallVector<BasicBlock *, 16> DeadBlocks;
+ for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
+ if (!DT.isReachableFromEntry(BB)) {
+ for (BasicBlock *SuccBB : successors(BB))
+ SuccBB->removePredecessor(BB);
+ DeadBlocks.push_back(BB);
+ }
+
SmallPtrSet<BasicBlock *, 16> DeadBlockSet(DeadBlocks.begin(),
DeadBlocks.end());
@@ -1187,11 +1229,6 @@ deleteDeadBlocksFromLoop(Loop &L,
llvm::erase_if(ExitBlocks,
[&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
- // Remove these blocks from their successors.
- for (auto *BB : DeadBlocks)
- for (BasicBlock *SuccBB : successors(BB))
- SuccBB->removePredecessor(BB, /*DontDeleteUselessPHIs*/ true);
-
// Walk from this loop up through its parents removing all of the dead blocks.
for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
for (auto *BB : DeadBlocks)
@@ -1582,31 +1619,24 @@ void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
} while (!DomWorklist.empty());
}
-/// Take an invariant branch that has been determined to be safe and worthwhile
-/// to unswitch despite being non-trivial to do so and perform the unswitch.
-///
-/// This directly updates the CFG to hoist the predicate out of the loop, and
-/// clone the necessary parts of the loop to maintain behavior.
-///
-/// It also updates both dominator tree and loopinfo based on the unswitching.
-///
-/// Once unswitching has been performed it runs the provided callback to report
-/// the new loops and no-longer valid loops to the caller.
-static bool unswitchInvariantBranch(
- Loop &L, BranchInst &BI, ArrayRef<Value *> Invariants, DominatorTree &DT,
- LoopInfo &LI, AssumptionCache &AC,
+static bool unswitchNontrivialInvariants(
+ Loop &L, TerminatorInst &TI, ArrayRef<Value *> Invariants,
+ DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
- auto *ParentBB = BI.getParent();
-
- // We can only unswitch conditional branches with an invariant condition or
- // combining invariant conditions with an instruction.
- assert(BI.isConditional() && "Can only unswitch a conditional branch!");
- bool FullUnswitch = BI.getCondition() == Invariants[0];
+ auto *ParentBB = TI.getParent();
+ BranchInst *BI = dyn_cast<BranchInst>(&TI);
+ SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
+
+ // We can only unswitch switches, conditional branches with an invariant
+ // condition, or combining invariant conditions with an instruction.
+ assert((SI || BI->isConditional()) &&
+ "Can only unswitch switches and conditional branch!");
+ bool FullUnswitch = SI || BI->getCondition() == Invariants[0];
if (FullUnswitch)
assert(Invariants.size() == 1 &&
"Cannot have other invariants with full unswitching!");
else
- assert(isa<Instruction>(BI.getCondition()) &&
+ assert(isa<Instruction>(BI->getCondition()) &&
"Partial unswitching requires an instruction as the condition!");
// Constant and BBs tracking the cloned and continuing successor. When we are
@@ -1618,18 +1648,27 @@ static bool unswitchInvariantBranch(
bool Direction = true;
int ClonedSucc = 0;
if (!FullUnswitch) {
- if (cast<Instruction>(BI.getCondition())->getOpcode() != Instruction::Or) {
- assert(cast<Instruction>(BI.getCondition())->getOpcode() == Instruction::And &&
- "Only `or` and `and` instructions can combine invariants being unswitched.");
+ if (cast<Instruction>(BI->getCondition())->getOpcode() != Instruction::Or) {
+ assert(cast<Instruction>(BI->getCondition())->getOpcode() ==
+ Instruction::And &&
+ "Only `or` and `and` instructions can combine invariants being "
+ "unswitched.");
Direction = false;
ClonedSucc = 1;
}
}
- auto *UnswitchedSuccBB = BI.getSuccessor(ClonedSucc);
- auto *ContinueSuccBB = BI.getSuccessor(1 - ClonedSucc);
- assert(UnswitchedSuccBB != ContinueSuccBB &&
- "Should not unswitch a branch that always goes to the same place!");
+ BasicBlock *RetainedSuccBB =
+ BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
+ SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
+ if (BI)
+ UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
+ else
+ for (auto Case : SI->cases())
+ UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
+
+ assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&
+ "Should not unswitch the same successor we are retaining!");
// The branch should be in this exact loop. Any inner loop's invariant branch
// should be handled by unswitching that inner loop. The caller of this
@@ -1648,9 +1687,6 @@ static bool unswitchInvariantBranch(
if (isa<CleanupPadInst>(ExitBB->getFirstNonPHI()))
return false;
- SmallPtrSet<BasicBlock *, 4> ExitBlockSet(ExitBlocks.begin(),
- ExitBlocks.end());
-
// Compute the parent loop now before we start hacking on things.
Loop *ParentL = L.getParentLoop();
@@ -1669,30 +1705,22 @@ static bool unswitchInvariantBranch(
OuterExitL = NewOuterExitL;
}
- // If the edge we *aren't* cloning in the unswitch (the continuing edge)
- // dominates its target, we can skip cloning the dominated region of the loop
- // and its exits. We compute this as a set of nodes to be skipped.
- SmallPtrSet<BasicBlock *, 4> SkippedLoopAndExitBlocks;
- if (ContinueSuccBB->getUniquePredecessor() ||
- llvm::all_of(predecessors(ContinueSuccBB), [&](BasicBlock *PredBB) {
- return PredBB == ParentBB || DT.dominates(ContinueSuccBB, PredBB);
- })) {
- visitDomSubTree(DT, ContinueSuccBB, [&](BasicBlock *BB) {
- SkippedLoopAndExitBlocks.insert(BB);
- return true;
- });
- }
- // If we are doing full unswitching, then similarly to the above, the edge we
- // *are* cloning in the unswitch (the unswitched edge) dominates its target,
- // we will end up with dead nodes in the original loop and its exits that will
- // need to be deleted. Here, we just retain that the property holds and will
- // compute the deleted set later.
- bool DeleteUnswitchedSucc =
- FullUnswitch &&
- (UnswitchedSuccBB->getUniquePredecessor() ||
- llvm::all_of(predecessors(UnswitchedSuccBB), [&](BasicBlock *PredBB) {
- return PredBB == ParentBB || DT.dominates(UnswitchedSuccBB, PredBB);
- }));
+ // If the edge from this terminator to a successor dominates that successor,
+ // store a map from each block in its dominator subtree to it. This lets us
+ // tell when cloning for a particular successor if a block is dominated by
+ // some *other* successor with a single data structure. We use this to
+ // significantly reduce cloning.
+ SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc;
+ for (auto *SuccBB : llvm::concat<BasicBlock *const>(
+ makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
+ if (SuccBB->getUniquePredecessor() ||
+ llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
+ return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
+ }))
+ visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
+ DominatingSucc[BB] = SuccBB;
+ return true;
+ });
// Split the preheader, so that we know that there is a safe place to insert
// the conditional branch. We will change the preheader to have a conditional
@@ -1702,84 +1730,93 @@ static bool unswitchInvariantBranch(
BasicBlock *SplitBB = L.getLoopPreheader();
BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI);
- // Keep a mapping for the cloned values.
- ValueToValueMapTy VMap;
-
// Keep track of the dominator tree updates needed.
SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
- // Build the cloned blocks from the loop.
- auto *ClonedPH = buildClonedLoopBlocks(
- L, LoopPH, SplitBB, ExitBlocks, ParentBB, UnswitchedSuccBB,
- ContinueSuccBB, SkippedLoopAndExitBlocks, VMap, DTUpdates, AC, DT, LI);
+ // Clone the loop for each unswitched successor.
+ SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
+ VMaps.reserve(UnswitchedSuccBBs.size());
+ SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs;
+ for (auto *SuccBB : UnswitchedSuccBBs) {
+ VMaps.emplace_back(new ValueToValueMapTy());
+ ClonedPHs[SuccBB] = buildClonedLoopBlocks(
+ L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
+ DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI);
+ }
// The stitching of the branched code back together depends on whether we're
// doing full unswitching or not with the exception that we always want to
// nuke the initial terminator placed in the split block.
SplitBB->getTerminator()->eraseFromParent();
if (FullUnswitch) {
- // Remove the parent as a predecessor of the
- // unswitched successor.
- UnswitchedSuccBB->removePredecessor(ParentBB,
- /*DontDeleteUselessPHIs*/ true);
- DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
-
- // Now splice the branch from the original loop and use it to select between
- // the two loops.
- SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), BI);
- BI.setSuccessor(ClonedSucc, ClonedPH);
- BI.setSuccessor(1 - ClonedSucc, LoopPH);
+ for (BasicBlock *SuccBB : UnswitchedSuccBBs) {
+ // Remove the parent as a predecessor of the unswitched successor.
+ SuccBB->removePredecessor(ParentBB,
+ /*DontDeleteUselessPHIs*/ true);
+ DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
+ }
+
+ // Now splice the terminator from the original loop and rewrite its
+ // successors.
+ SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
+ if (BI) {
+ assert(UnswitchedSuccBBs.size() == 1 &&
+ "Only one possible unswitched block for a branch!");
+ BasicBlock *ClonedPH = ClonedPHs.begin()->second;
+ BI->setSuccessor(ClonedSucc, ClonedPH);
+ BI->setSuccessor(1 - ClonedSucc, LoopPH);
+ DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
+ } else {
+ assert(SI && "Must either be a branch or switch!");
+
+ // Walk the cases and directly update their successors.
+ for (auto &Case : SI->cases())
+ Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
+ // We need to use the set to populate domtree updates as even when there
+ // are multiple cases pointing at the same successor we only want to
+ // insert one edge in the domtree.
+ for (BasicBlock *SuccBB : UnswitchedSuccBBs)
+ DTUpdates.push_back(
+ {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
+
+ SI->setDefaultDest(LoopPH);
+ }
// Create a new unconditional branch to the continuing block (as opposed to
// the one cloned).
- BranchInst::Create(ContinueSuccBB, ParentBB);
+ BranchInst::Create(RetainedSuccBB, ParentBB);
} else {
+ assert(BI && "Only branches have partial unswitching.");
+ assert(UnswitchedSuccBBs.size() == 1 &&
+ "Only one possible unswitched block for a branch!");
+ BasicBlock *ClonedPH = ClonedPHs.begin()->second;
// When doing a partial unswitch, we have to do a bit more work to build up
// the branch in the split block.
buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
*ClonedPH, *LoopPH);
+ DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
}
- // Before we update the dominator tree, collect the dead blocks if we're going
- // to end up deleting the unswitched successor.
- SmallVector<BasicBlock *, 16> DeadBlocks;
- if (DeleteUnswitchedSucc) {
- DeadBlocks.push_back(UnswitchedSuccBB);
- for (int i = 0; i < (int)DeadBlocks.size(); ++i) {
- // If we reach an exit block, stop recursing as the unswitched loop will
- // end up reaching the merge block which we make the successor of the
- // exit.
- if (ExitBlockSet.count(DeadBlocks[i]))
- continue;
-
- // Insert the children that are within the loop or exit block set. Other
- // children may reach out of the loop. While we don't expect these to be
- // dead (as the unswitched clone should reach them) we don't try to prove
- // that here.
- for (DomTreeNode *ChildN : *DT[DeadBlocks[i]])
- if (L.contains(ChildN->getBlock()) ||
- ExitBlockSet.count(ChildN->getBlock()))
- DeadBlocks.push_back(ChildN->getBlock());
- }
- }
-
- // Add the remaining edge to our updates and apply them to get an up-to-date
- // dominator tree. Note that this will cause the dead blocks above to be
- // unreachable and no longer in the dominator tree.
- DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
+ // Apply the updates accumulated above to get an up-to-date dominator tree.
DT.applyUpdates(DTUpdates);
+ // Now that we have an accurate dominator tree, first delete the dead cloned
+ // blocks so that we can accurately build any cloned loops. It is important to
+ // not delete the blocks from the original loop yet because we still want to
+ // reference the original loop to understand the cloned loop's structure.
+ deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT);
+
// Build the cloned loop structure itself. This may be substantially
// different from the original structure due to the simplified CFG. This also
// handles inserting all the cloned blocks into the correct loops.
SmallVector<Loop *, 4> NonChildClonedLoops;
- buildClonedLoops(L, ExitBlocks, VMap, LI, NonChildClonedLoops);
-
- // Delete anything that was made dead in the original loop due to
- // unswitching.
- if (!DeadBlocks.empty())
- deleteDeadBlocksFromLoop(L, DeadBlocks, ExitBlocks, DT, LI);
+ for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
+ buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
+ // Now that our cloned loops have been built, we can update the original loop.
+ // First we delete the dead blocks from it and then we rebuild the loop
+ // structure taking these deletions into account.
+ deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI);
SmallVector<Loop *, 4> HoistedLoops;
bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
@@ -1790,31 +1827,37 @@ static bool unswitchInvariantBranch(
// verification steps.
assert(DT.verify(DominatorTree::VerificationLevel::Fast));
- // Now we want to replace all the uses of the invariants within both the
- // original and cloned blocks. We do this here so that we can use the now
- // updated dominator tree to identify which side the users are on.
- ConstantInt *UnswitchedReplacement =
- Direction ? ConstantInt::getTrue(BI.getContext())
- : ConstantInt::getFalse(BI.getContext());
- ConstantInt *ContinueReplacement =
- Direction ? ConstantInt::getFalse(BI.getContext())
- : ConstantInt::getTrue(BI.getContext());
- for (Value *Invariant : Invariants)
- for (auto UI = Invariant->use_begin(), UE = Invariant->use_end();
- UI != UE;) {
- // Grab the use and walk past it so we can clobber it in the use list.
- Use *U = &*UI++;
- Instruction *UserI = dyn_cast<Instruction>(U->getUser());
- if (!UserI)
- continue;
+ if (BI) {
+ // If we unswitched a branch which collapses the condition to a known
+ // constant we want to replace all the uses of the invariants within both
+ // the original and cloned blocks. We do this here so that we can use the
+ // now updated dominator tree to identify which side the users are on.
+ assert(UnswitchedSuccBBs.size() == 1 &&
+ "Only one possible unswitched block for a branch!");
+ BasicBlock *ClonedPH = ClonedPHs.begin()->second;
+ ConstantInt *UnswitchedReplacement =
+ Direction ? ConstantInt::getTrue(BI->getContext())
+ : ConstantInt::getFalse(BI->getContext());
+ ConstantInt *ContinueReplacement =
+ Direction ? ConstantInt::getFalse(BI->getContext())
+ : ConstantInt::getTrue(BI->getContext());
+ for (Value *Invariant : Invariants)
+ for (auto UI = Invariant->use_begin(), UE = Invariant->use_end();
+ UI != UE;) {
+ // Grab the use and walk past it so we can clobber it in the use list.
+ Use *U = &*UI++;
+ Instruction *UserI = dyn_cast<Instruction>(U->getUser());
+ if (!UserI)
+ continue;
- // Replace it with the 'continue' side if in the main loop body, and the
- // unswitched if in the cloned blocks.
- if (DT.dominates(LoopPH, UserI->getParent()))
- U->set(ContinueReplacement);
- else if (DT.dominates(ClonedPH, UserI->getParent()))
- U->set(UnswitchedReplacement);
- }
+ // Replace it with the 'continue' side if in the main loop body, and the
+ // unswitched if in the cloned blocks.
+ if (DT.dominates(LoopPH, UserI->getParent()))
+ U->set(ContinueReplacement);
+ else if (DT.dominates(ClonedPH, UserI->getParent()))
+ U->set(UnswitchedReplacement);
+ }
+ }
// We can change which blocks are exit blocks of all the cloned sibling
// loops, the current loop, and any parent loops which shared exit blocks
@@ -1937,8 +1980,16 @@ static bool unswitchBestCondition(
if (LI.getLoopFor(BB) != &L)
continue;
+ if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+ // We can only consider fully loop-invariant switch conditions as we need
+ // to completely eliminate the switch after unswitching.
+ if (!isa<Constant>(SI->getCondition()) &&
+ L.isLoopInvariant(SI->getCondition()))
+ UnswitchCandidates.push_back({SI, {SI->getCondition()}});
+ continue;
+ }
+
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
- // FIXME: Handle switches here!
if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
BI->getSuccessor(0) == BI->getSuccessor(1))
continue;
@@ -2091,9 +2142,9 @@ static bool unswitchBestCondition(
TerminatorInst &TI = *TerminatorAndInvariants.first;
ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
BranchInst *BI = dyn_cast<BranchInst>(&TI);
- int CandidateCost =
- ComputeUnswitchedCost(TI, /*FullUnswitch*/ Invariants.size() == 1 && BI &&
- Invariants[0] == BI->getCondition());
+ int CandidateCost = ComputeUnswitchedCost(
+ TI, /*FullUnswitch*/ !BI || (Invariants.size() == 1 &&
+ Invariants[0] == BI->getCondition()));
LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
<< " for unswitch candidate: " << TI << "\n");
if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
@@ -2109,17 +2160,11 @@ static bool unswitchBestCondition(
return false;
}
- auto *UnswitchBI = dyn_cast<BranchInst>(BestUnswitchTI);
- if (!UnswitchBI) {
- // FIXME: Add support for unswitching a switch here!
- LLVM_DEBUG(dbgs() << "Cannot unswitch anything but a branch!\n");
- return false;
- }
-
LLVM_DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = "
- << BestUnswitchCost << ") branch: " << *UnswitchBI << "\n");
- return unswitchInvariantBranch(L, *UnswitchBI, BestUnswitchInvariants, DT, LI,
- AC, UnswitchCB);
+ << BestUnswitchCost << ") terminator: " << *BestUnswitchTI
+ << "\n");
+ return unswitchNontrivialInvariants(
+ L, *BestUnswitchTI, BestUnswitchInvariants, DT, LI, AC, UnswitchCB);
}
/// Unswitch control flow predicated on loop invariant conditions.
OpenPOWER on IntegriCloud