diff options
-rw-r--r-- | llvm/lib/Transforms/Scalar/GuardWidening.cpp | 38 | ||||
-rw-r--r-- | llvm/test/Transforms/GuardWidening/loop-schedule.ll | 33 |
2 files changed, 56 insertions, 15 deletions
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp index 92e677db856..55d2f6ee81c 100644 --- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -65,7 +65,7 @@ namespace { class GuardWideningImpl { DominatorTree &DT; - PostDominatorTree &PDT; + PostDominatorTree *PDT; LoopInfo &LI; /// Together, these describe the region of interest. This might be all of @@ -214,7 +214,7 @@ class GuardWideningImpl { public: - explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree &PDT, + explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT, LoopInfo &LI, DomTreeNode *Root, std::function<bool(BasicBlock*)> BlockFilter) : DT(DT), PDT(PDT), LI(LI), Root(Root), BlockFilter(BlockFilter) {} @@ -353,9 +353,8 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( // 2) we have the extra cost of computing the guard condition in the common // case. At the moment, we really only consider the second in our heuristic // here. TODO: evaluate cost model for spurious deopt - bool HoistingOutOfIf = - !PDT.dominates(DominatedGuard->getParent(), DominatingGuard->getParent()); - + // NOTE: As written, this also lets us hoist right over another guard which + // is essentially just another spelling for control flow. if (isWideningCondProfitable(DominatedGuard->getArgOperand(0), DominatingGuard->getArgOperand(0))) return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; @@ -363,7 +362,26 @@ GuardWideningImpl::WideningScore GuardWideningImpl::computeWideningScore( if (HoistingOutOfLoop) return WS_Positive; - return HoistingOutOfIf ? WS_IllegalOrNegative : WS_Neutral; + // Returns true if we might be hoisting above explicit control flow. Note + // that this completely ignores implicit control flow (guards, calls which + // throw, etc...). That choice appears arbitrary. + auto MaybeHoistingOutOfIf = [&]() { + auto *DominatingBlock = DominatingGuard->getParent(); + auto *DominatedBlock = DominatedGuard->getParent(); + + // Same Block? + if (DominatedBlock == DominatingBlock) + return false; + // Obvious successor (common loop header/preheader case) + if (DominatedBlock == DominatingBlock->getUniqueSuccessor()) + return false; + // TODO: diamond, triangle cases + if (!PDT) return true; + return !PDT->dominates(DominatedGuard->getParent(), + DominatingGuard->getParent()); + }; + + return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral; } bool GuardWideningImpl::isAvailableAt(Value *V, Instruction *Loc, @@ -671,7 +689,7 @@ PreservedAnalyses GuardWideningPass::run(Function &F, auto &DT = AM.getResult<DominatorTreeAnalysis>(F); auto &LI = AM.getResult<LoopAnalysis>(F); auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); - if (!GuardWideningImpl(DT, PDT, LI, DT.getRootNode(), + if (!GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(), [](BasicBlock*) { return true; } ).run()) return PreservedAnalyses::all(); @@ -694,7 +712,7 @@ struct GuardWideningLegacyPass : public FunctionPass { auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); - return GuardWideningImpl(DT, PDT, LI, DT.getRootNode(), + return GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(), [](BasicBlock*) { return true; } ).run(); } @@ -720,7 +738,8 @@ struct LoopGuardWideningLegacyPass : public LoopPass { return false; auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); + auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); + auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; BasicBlock *RootBB = L->getLoopPredecessor(); if (!RootBB) RootBB = L->getHeader(); @@ -734,7 +753,6 @@ struct LoopGuardWideningLegacyPass : public LoopPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); getLoopAnalysisUsage(AU); - AU.addRequired<PostDominatorTreeWrapperPass>(); AU.addPreserved<PostDominatorTreeWrapperPass>(); } }; diff --git a/llvm/test/Transforms/GuardWidening/loop-schedule.ll b/llvm/test/Transforms/GuardWidening/loop-schedule.ll index 163fb521d47..94d1c863eac 100644 --- a/llvm/test/Transforms/GuardWidening/loop-schedule.ll +++ b/llvm/test/Transforms/GuardWidening/loop-schedule.ll @@ -3,13 +3,8 @@ ; Main point of this test is to check the scheduling -- there should be ; no analysis passes needed between LICM and LoopGuardWidening -; TODO: Because guard widdening currently requires post-dom, we end up -; breaking the loop pass manager to compute it. Need to either make all -; loop passes preserve postdom (hard) or make it optional in guard widdening ; CHECK: Loop Pass Manager ; CHECK: Loop Invariant Code Motion -; CHECK: Post-Dominator Tree Construction -; CHECK: Loop Pass Manager ; CHECK: Widen guards (within a single loop, as a loop pass) ; CHECK: Loop Invariant Code Motion @@ -21,6 +16,7 @@ define void @iter(i32 %a, i32 %b, i1* %c_p) { ; CHECK: %cond_1 = icmp ult i32 %b, 10 ; CHECK: %wide.chk = and i1 %cond_0, %cond_1 ; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ] +; CHECK-LABEL: loop: entry: %cond_0 = icmp ult i32 %a, 10 @@ -39,3 +35,30 @@ leave.loopexit: ; preds = %loop leave: ; preds = %leave.loopexit, %entry ret void } + +define void @within_loop(i32 %a, i32 %b, i1* %c_p) { +; CHECK-LABEL @within_loop +; CHECK: %cond_0 = icmp ult i32 %a, 10 +; CHECK: %cond_1 = icmp ult i32 %b, 10 +; CHECK: %wide.chk = and i1 %cond_0, %cond_1 +; CHECK-LABEL: loop: +; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ] + +entry: + br label %loop + +loop: ; preds = %loop.preheader, %loop + %cond_0 = icmp ult i32 %a, 10 + call void (i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ] + %cond_1 = icmp ult i32 %b, 10 + call void (i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ] + %cnd = load i1, i1* %c_p + br i1 %cnd, label %loop, label %leave.loopexit + +leave.loopexit: ; preds = %loop + br label %leave + +leave: ; preds = %leave.loopexit, %entry + ret void +} + |