diff options
| author | Wenlei He <aktoon@gmail.com> | 2019-11-24 23:54:07 -0800 |
|---|---|---|
| committer | Wenlei He <aktoon@gmail.com> | 2019-12-05 16:07:01 -0800 |
| commit | 532196d811ad4db1e522012c9d20e4a95aae2eb3 (patch) | |
| tree | d0672afbf979b0ce81e899a3f805a282c24e548f /llvm/lib | |
| parent | e503fd85d3ac9d3e1493a7a63bc43c6939e132cc (diff) | |
| download | bcm5719-llvm-532196d811ad4db1e522012c9d20e4a95aae2eb3.tar.gz bcm5719-llvm-532196d811ad4db1e522012c9d20e4a95aae2eb3.zip | |
[AutoFDO] Top-down Inlining for specialization with context-sensitive profile
Summary:
AutoFDO's sample profile loader processes function in arbitrary source code order, so if I change the order of two functions in source code, the inline decision can change. This also prevented the use of context-sensitive profile to do specialization while inlining. This commit enforces SCC top-down order for sample profile loader. With this change, we can now do specialization, as illustrated by the added test case:
Say if we have A->B->C and D->B->C call path, we want to inline C into B when root inliner is B, but not when root inliner is A or D, this is not possible without enforcing top-down order. E.g. Once C is inlined into B, A and D can only choose to inline (B->C) as a whole or nothing, but what we want is only inline B into A and D, not its recursive callee C. If we process functions in top-down order, this is no longer a problem, which is what this commit is doing.
This change is guarded with a new switch "-sample-profile-top-down-load" for tuning, and it depends on D70653. Eventually, top-down can be the default order for sample profile loader.
Reviewers: wmi, davidxl
Subscribers: hiraditya, llvm-commits, tejohnson
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70655
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Transforms/IPO/SampleProfile.cpp | 55 |
1 files changed, 46 insertions, 9 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 0a3e6ada58b..c11eeda2aa7 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -33,6 +34,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -142,6 +145,11 @@ static cl::opt<bool> ProfileMergeInlinee( cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site.")); +static cl::opt<bool> ProfileTopDownLoad( + "sample-profile-top-down-load", cl::Hidden, cl::init(false), + cl::desc("Do profile annotation and inlining for functions in top-down " + "order of call graph during sample profile loading.")); + namespace { using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>; @@ -291,7 +299,7 @@ public: bool doInitialization(Module &M); bool runOnModule(Module &M, ModuleAnalysisManager *AM, - ProfileSummaryInfo *_PSI); + ProfileSummaryInfo *_PSI, CallGraph *CG); void dump() { Reader->dump(); } @@ -323,6 +331,7 @@ protected: void propagateWeights(Function &F); uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); void buildEdges(Function &F); + std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG); bool propagateThroughEdges(Function &F, bool UpdateBlockCount); void computeDominanceAndLoopInfo(Function &F); void clearFunctionData(); @@ -1696,6 +1705,33 @@ INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile", "Sample Profile loader", false, false) +std::vector<Function *> +SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { + std::vector<Function *> FunctionOrderList; + FunctionOrderList.reserve(M.size()); + + if (!ProfileTopDownLoad || CG == nullptr) { + for (Function &F : M) + if (!F.isDeclaration()) + FunctionOrderList.push_back(&F); + return FunctionOrderList; + } + + assert(&CG->getModule() == &M); + scc_iterator<CallGraph *> CGI = scc_begin(CG); + while (!CGI.isAtEnd()) { + for (CallGraphNode *node : *CGI) { + auto F = node->getFunction(); + if (F && !F->isDeclaration()) + FunctionOrderList.push_back(F); + } + ++CGI; + } + + std::reverse(FunctionOrderList.begin(), FunctionOrderList.end()); + return FunctionOrderList; +} + bool SampleProfileLoader::doInitialization(Module &M) { auto &Ctx = M.getContext(); @@ -1733,7 +1769,7 @@ ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) { } bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, - ProfileSummaryInfo *_PSI) { + ProfileSummaryInfo *_PSI, CallGraph *CG) { GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); if (!ProfileIsValid) return false; @@ -1768,11 +1804,11 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, } bool retval = false; - for (auto &F : M) - if (!F.isDeclaration()) { - clearFunctionData(); - retval |= runOnFunction(F, AM); - } + for (auto F : buildFunctionOrder(M, CG)) { + assert(!F->isDeclaration()); + clearFunctionData(); + retval |= runOnFunction(*F, AM); + } // Account for cold calls not inlined.... for (const std::pair<Function *, NotInlinedProfileInfo> &pair : @@ -1787,7 +1823,7 @@ bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) { TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); ProfileSummaryInfo *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - return SampleLoader.runOnModule(M, nullptr, PSI); + return SampleLoader.runOnModule(M, nullptr, PSI, nullptr); } bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) { @@ -1871,7 +1907,8 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M, SampleLoader.doInitialization(M); ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); - if (!SampleLoader.runOnModule(M, &AM, PSI)) + CallGraph &CG = AM.getResult<CallGraphAnalysis>(M); + if (!SampleLoader.runOnModule(M, &AM, PSI, &CG)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); |

