summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/CodeGenPrepare.cpp
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2019-05-04 12:46:32 +0000
committerSanjay Patel <spatel@rotateright.com>2019-05-04 12:46:32 +0000
commit5ab41a7a0552690e9f7ca657bee1d0507baaddfb (patch)
tree27306d4c26bb538f05725642b079697fb77cb068 /llvm/lib/CodeGen/CodeGenPrepare.cpp
parent55dc751ef7ad3df0b7870fa0d3851d5b0225213b (diff)
downloadbcm5719-llvm-5ab41a7a0552690e9f7ca657bee1d0507baaddfb.tar.gz
bcm5719-llvm-5ab41a7a0552690e9f7ca657bee1d0507baaddfb.zip
[CodeGenPrepare] limit overflow intrinsic matching to a single basic block (2nd try)
This is a subset of the original commit from rL359879 which was reverted because it could crash when using the 'RemovedInstructions' structure that enables delayed deletion of dead instructions. The motivating compile-time win does not require that change though. We should get most of that win from this change alone. Using/updating a dominator tree to match math overflow patterns may be very expensive in compile-time (because of the way CGP uses a DT), so just handle the single-block case. See post-commit thread for rL354298 for more details: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20190422/646276.html Differential Revision: https://reviews.llvm.org/D61075 llvm-svn: 359969
Diffstat (limited to 'llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp49
1 files changed, 21 insertions, 28 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 4335f10117e..45afaf20c59 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1177,6 +1177,20 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
CmpInst *Cmp,
Intrinsic::ID IID) {
+ if (BO->getParent() != Cmp->getParent()) {
+ // We used to use a dominator tree here to allow multi-block optimization.
+ // But that was problematic because:
+ // 1. It could cause a perf regression by hoisting the math op into the
+ // critical path.
+ // 2. It could cause a perf regression by creating a value that was live
+ // across multiple blocks and increasing register pressure.
+ // 3. Use of a dominator tree could cause large compile-time regression.
+ // This is because we recompute the DT on every change in the main CGP
+ // run-loop. The recomputing is probably unnecessary in many cases, so if
+ // that was fixed, using a DT here would be ok.
+ return false;
+ }
+
// We allow matching the canonical IR (add X, C) back to (usubo X, -C).
Value *Arg0 = BO->getOperand(0);
Value *Arg1 = BO->getOperand(1);
@@ -1186,36 +1200,15 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
}
- Instruction *InsertPt;
- if (BO->hasOneUse() && BO->user_back() == Cmp) {
- // If the math is only used by the compare, insert at the compare to keep
- // the condition in the same block as its users. (CGP aggressively sinks
- // compares to help out SDAG.)
- InsertPt = Cmp;
- } else {
- // The math and compare may be independent instructions. Check dominance to
- // determine the insertion point for the intrinsic.
- bool MathDominates = getDT(*BO->getFunction()).dominates(BO, Cmp);
- if (!MathDominates && !getDT(*BO->getFunction()).dominates(Cmp, BO))
- return false;
-
- BasicBlock *MathBB = BO->getParent(), *CmpBB = Cmp->getParent();
- if (MathBB != CmpBB) {
- // Avoid hoisting an extra op into a dominating block and creating a
- // potentially longer critical path.
- if (!MathDominates)
- return false;
- // Check that the insertion doesn't create a value that is live across
- // more than two blocks, so to minimise the increase in register pressure.
- BasicBlock *Dominator = MathDominates ? MathBB : CmpBB;
- BasicBlock *Dominated = MathDominates ? CmpBB : MathBB;
- auto Successors = successors(Dominator);
- if (llvm::find(Successors, Dominated) == Successors.end())
- return false;
+ // Insert at the first instruction of the pair.
+ Instruction *InsertPt = nullptr;
+ for (Instruction &Iter : *Cmp->getParent()) {
+ if (&Iter == BO || &Iter == Cmp) {
+ InsertPt = &Iter;
+ break;
}
-
- InsertPt = MathDominates ? cast<Instruction>(BO) : cast<Instruction>(Cmp);
}
+ assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
IRBuilder<> Builder(InsertPt);
Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
OpenPOWER on IntegriCloud