diff options
author | Vedant Kumar <vsk@apple.com> | 2019-10-08 17:17:51 +0000 |
---|---|---|
committer | Vedant Kumar <vsk@apple.com> | 2019-10-08 17:17:51 +0000 |
commit | 9852699dcb18dd26866695a861e31a07bcc16e82 (patch) | |
tree | fd4fcadee38bbfaf30f60fa0d7f816fe9b4f413d /llvm/lib/Transforms/IPO | |
parent | d8245e7a36d50310c85cbefe6b79f27f745e7cee (diff) | |
download | bcm5719-llvm-9852699dcb18dd26866695a861e31a07bcc16e82.tar.gz bcm5719-llvm-9852699dcb18dd26866695a861e31a07bcc16e82.zip |
[CodeExtractor] Factor out and reuse shrinkwrap analysis
Factor out CodeExtractor's analysis of allocas (for shrinkwrapping
purposes), and allow the analysis to be reused.
This resolves a quadratic compile-time bug observed when compiling
AMDGPUDisassembler.cpp.o.
Pre-patch (Release + LTO clang):
```
---User Time--- --System Time-- --User+System-- ---Wall Time--- --- Name ---
176.5278 ( 57.8%) 0.4915 ( 18.5%) 177.0192 ( 57.4%) 177.4112 ( 57.3%) Hot Cold Splitting
```
Post-patch (ReleaseAsserts clang):
```
---User Time--- --System Time-- --User+System-- ---Wall Time--- --- Name ---
1.4051 ( 3.3%) 0.0079 ( 0.3%) 1.4129 ( 3.2%) 1.4129 ( 3.2%) Hot Cold Splitting
```
Testing: check-llvm, and comparing the AMDGPUDisassembler.cpp.o binary
pre- vs. post-patch.
An alternate approach is to hide CodeExtractorAnalysisCache from clients
of CodeExtractor, and to recompute the analysis from scratch inside of
CodeExtractor::extractCodeRegion(). This eliminates some redundant work
in the shrinkwrapping legality check. However, some clients continue to
exhibit O(n^2) compile time behavior as computing the analysis is O(n).
rdar://55912966
Differential Revision: https://reviews.llvm.org/D68616
llvm-svn: 374089
Diffstat (limited to 'llvm/lib/Transforms/IPO')
-rw-r--r-- | llvm/lib/Transforms/IPO/BlockExtractor.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/HotColdSplitting.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/LoopExtractor.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/PartialInlining.cpp | 8 |
4 files changed, 26 insertions, 17 deletions
diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp index 56854fceac0..de80c88c159 100644 --- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp +++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp @@ -206,7 +206,8 @@ bool BlockExtractor::runOnModule(Module &M) { ++NumExtracted; Changed = true; } - Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(); + CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); + Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC); if (F) LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() << "' in: " << F->getName() << '\n'); diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index bd641da37f5..cfdcc8db7f5 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -290,13 +290,10 @@ static int getOutliningPenalty(ArrayRef<BasicBlock *> Region, return Penalty; } -Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region, - DominatorTree &DT, - BlockFrequencyInfo *BFI, - TargetTransformInfo &TTI, - OptimizationRemarkEmitter &ORE, - AssumptionCache *AC, - unsigned Count) { +Function *HotColdSplitting::extractColdRegion( + const BlockSequence &Region, const CodeExtractorAnalysisCache &CEAC, + DominatorTree &DT, BlockFrequencyInfo *BFI, TargetTransformInfo &TTI, + OptimizationRemarkEmitter &ORE, AssumptionCache *AC, unsigned Count) { assert(!Region.empty()); // TODO: Pass BFI and BPI to update profile information. @@ -318,7 +315,7 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region, return nullptr; Function *OrigF = Region[0]->getParent(); - if (Function *OutF = CE.extractCodeRegion()) { + if (Function *OutF = CE.extractCodeRegion(CEAC)) { User *U = *OutF->user_begin(); CallInst *CI = cast<CallInst>(U); CallSite CS(CI); @@ -606,9 +603,14 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { } } + if (OutliningWorklist.empty()) + return Changed; + // Outline single-entry cold regions, splitting up larger regions as needed. unsigned OutlinedFunctionID = 1; - while (!OutliningWorklist.empty()) { + // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time. + CodeExtractorAnalysisCache CEAC(F); + do { OutliningRegion Region = OutliningWorklist.pop_back_val(); assert(!Region.empty() && "Empty outlining region in worklist"); do { @@ -619,14 +621,14 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { BB->dump(); }); - Function *Outlined = extractColdRegion(SubRegion, *DT, BFI, TTI, ORE, AC, - OutlinedFunctionID); + Function *Outlined = extractColdRegion(SubRegion, CEAC, *DT, BFI, TTI, + ORE, AC, OutlinedFunctionID); if (Outlined) { ++OutlinedFunctionID; Changed = true; } } while (!Region.empty()); - } + } while (!OutliningWorklist.empty()); return Changed; } diff --git a/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/llvm/lib/Transforms/IPO/LoopExtractor.cpp index 91c7b5f5f13..add2ae05373 100644 --- a/llvm/lib/Transforms/IPO/LoopExtractor.cpp +++ b/llvm/lib/Transforms/IPO/LoopExtractor.cpp @@ -141,10 +141,12 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { if (NumLoops == 0) return Changed; --NumLoops; AssumptionCache *AC = nullptr; + Function &Func = *L->getHeader()->getParent(); if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>()) - AC = ACT->lookupAssumptionCache(*L->getHeader()->getParent()); + AC = ACT->lookupAssumptionCache(Func); + CodeExtractorAnalysisCache CEAC(Func); CodeExtractor Extractor(DT, *L, false, nullptr, nullptr, AC); - if (Extractor.extractCodeRegion() != nullptr) { + if (Extractor.extractCodeRegion(CEAC) != nullptr) { Changed = true; // After extraction, the loop is replaced by a function call, so // we shouldn't try to run any more loop passes on it. diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index a0f0b6726cc..e193074884a 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -1122,6 +1122,9 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { BranchProbabilityInfo BPI(*ClonedFunc, LI); ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI)); + // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time. + CodeExtractorAnalysisCache CEAC(*ClonedFunc); + SetVector<Value *> Inputs, Outputs, Sinks; for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo : ClonedOMRI->ORI) { @@ -1148,7 +1151,7 @@ bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() { if (Outputs.size() > 0 && !ForceLiveExit) continue; - Function *OutlinedFunc = CE.extractCodeRegion(); + Function *OutlinedFunc = CE.extractCodeRegion(CEAC); if (OutlinedFunc) { CallSite OCS = PartialInlinerImpl::getOneCallSiteTo(OutlinedFunc); @@ -1210,11 +1213,12 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() { } // Extract the body of the if. + CodeExtractorAnalysisCache CEAC(*ClonedFunc); Function *OutlinedFunc = CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc), /* AllowVarargs */ true) - .extractCodeRegion(); + .extractCodeRegion(CEAC); if (OutlinedFunc) { BasicBlock *OutliningCallBB = |