diff options
author | Sanjay Patel <spatel@rotateright.com> | 2019-05-04 12:46:32 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2019-05-04 12:46:32 +0000 |
commit | 5ab41a7a0552690e9f7ca657bee1d0507baaddfb (patch) | |
tree | 27306d4c26bb538f05725642b079697fb77cb068 /llvm/lib/CodeGen/CodeGenPrepare.cpp | |
parent | 55dc751ef7ad3df0b7870fa0d3851d5b0225213b (diff) | |
download | bcm5719-llvm-5ab41a7a0552690e9f7ca657bee1d0507baaddfb.tar.gz bcm5719-llvm-5ab41a7a0552690e9f7ca657bee1d0507baaddfb.zip |
[CodeGenPrepare] limit overflow intrinsic matching to a single basic block (2nd try)
This is a subset of the original commit from rL359879
which was reverted because it could crash when using the 'RemovedInstructions'
structure that enables delayed deletion of dead instructions. The motivating
compile-time win does not require that change though. We should get most of
that win from this change alone.
Using/updating a dominator tree to match math overflow patterns may be very
expensive in compile-time (because of the way CGP uses a DT), so just handle
the single-block case.
See post-commit thread for rL354298 for more details:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20190422/646276.html
Differential Revision: https://reviews.llvm.org/D61075
llvm-svn: 359969
Diffstat (limited to 'llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r-- | llvm/lib/CodeGen/CodeGenPrepare.cpp | 49 |
1 files changed, 21 insertions, 28 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 4335f10117e..45afaf20c59 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -1177,6 +1177,20 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp, Intrinsic::ID IID) { + if (BO->getParent() != Cmp->getParent()) { + // We used to use a dominator tree here to allow multi-block optimization. + // But that was problematic because: + // 1. It could cause a perf regression by hoisting the math op into the + // critical path. + // 2. It could cause a perf regression by creating a value that was live + // across multiple blocks and increasing register pressure. + // 3. Use of a dominator tree could cause large compile-time regression. + // This is because we recompute the DT on every change in the main CGP + // run-loop. The recomputing is probably unnecessary in many cases, so if + // that was fixed, using a DT here would be ok. + return false; + } + // We allow matching the canonical IR (add X, C) back to (usubo X, -C). Value *Arg0 = BO->getOperand(0); Value *Arg1 = BO->getOperand(1); @@ -1186,36 +1200,15 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1)); } - Instruction *InsertPt; - if (BO->hasOneUse() && BO->user_back() == Cmp) { - // If the math is only used by the compare, insert at the compare to keep - // the condition in the same block as its users. (CGP aggressively sinks - // compares to help out SDAG.) - InsertPt = Cmp; - } else { - // The math and compare may be independent instructions. Check dominance to - // determine the insertion point for the intrinsic. - bool MathDominates = getDT(*BO->getFunction()).dominates(BO, Cmp); - if (!MathDominates && !getDT(*BO->getFunction()).dominates(Cmp, BO)) - return false; - - BasicBlock *MathBB = BO->getParent(), *CmpBB = Cmp->getParent(); - if (MathBB != CmpBB) { - // Avoid hoisting an extra op into a dominating block and creating a - // potentially longer critical path. - if (!MathDominates) - return false; - // Check that the insertion doesn't create a value that is live across - // more than two blocks, so to minimise the increase in register pressure. - BasicBlock *Dominator = MathDominates ? MathBB : CmpBB; - BasicBlock *Dominated = MathDominates ? CmpBB : MathBB; - auto Successors = successors(Dominator); - if (llvm::find(Successors, Dominated) == Successors.end()) - return false; + // Insert at the first instruction of the pair. + Instruction *InsertPt = nullptr; + for (Instruction &Iter : *Cmp->getParent()) { + if (&Iter == BO || &Iter == Cmp) { + InsertPt = &Iter; + break; } - - InsertPt = MathDominates ? cast<Instruction>(BO) : cast<Instruction>(Cmp); } + assert(InsertPt != nullptr && "Parent block did not contain cmp or binop"); IRBuilder<> Builder(InsertPt); Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1); |