summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDehao Chen <dehao@google.com>2017-07-07 20:53:10 +0000
committerDehao Chen <dehao@google.com>2017-07-07 20:53:10 +0000
commit3a9861420cec131c6d87dbd73a4ecadb3107eb7f (patch)
treea099af04eb580f48dad0c57e7400049688c4b71a
parent3a11fdf8ce5099db375c0f0f2c210818013429a5 (diff)
downloadbcm5719-llvm-3a9861420cec131c6d87dbd73a4ecadb3107eb7f.tar.gz
bcm5719-llvm-3a9861420cec131c6d87dbd73a4ecadb3107eb7f.zip
Add sample PGO support to ThinLTO new pass manager.
Summary: For SamplePGO + ThinLTO, because profile annotation is done twice at both PrepareForThinLTO pipeline and backend compiler, the following changes are needed at the PrepareForThinLTO phase to ensure the IR is not changed dramatically. Otherwise the profile annotation will be inaccurate in the backend compiler. * disable hot-caller heuristic * disable loop unrolling * disable indirect call promotion This will unblock the new PM testing for sample PGO (tools/clang/test/CodeGen/pgo-sample-thinlto-summary.c), which will be covered in another cfe patch. Reviewers: chandlerc, tejohnson, davidxl Reviewed By: tejohnson Subscribers: sanjoy, mehdi_amini, Prazek, inglorion, llvm-commits Differential Revision: https://reviews.llvm.org/D34895 llvm-svn: 307437
-rw-r--r--llvm/include/llvm/Passes/PassBuilder.h14
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp41
2 files changed, 40 insertions, 15 deletions
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index ff195839733..e3e03010f5e 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -188,9 +188,14 @@ public:
/// only intended for use when attempting to optimize code. If frontends
/// require some transformations for semantic reasons, they should explicitly
/// build them.
+ ///
+ /// \p PrepareForThinLTO indicates whether this is invoked in
+ /// PrepareForThinLTO phase. Special handling is needed for sample PGO to
+ /// ensure profile accurate in the backend profile annotation phase.
FunctionPassManager
buildFunctionSimplificationPipeline(OptimizationLevel Level,
- bool DebugLogging = false);
+ bool DebugLogging = false,
+ bool PrepareForThinLTO = false);
/// Construct the core LLVM module canonicalization and simplification
/// pipeline.
@@ -205,9 +210,14 @@ public:
/// only intended for use when attempting to optimize code. If frontends
/// require some transformations for semantic reasons, they should explicitly
/// build them.
+ ///
+ /// \p PrepareForThinLTO indicates whether this is invoked in
+ /// PrepareForThinLTO phase. Special handling is needed for sample PGO to
+ /// ensure profile accurate in the backend profile annotation phase.
ModulePassManager
buildModuleSimplificationPipeline(OptimizationLevel Level,
- bool DebugLogging = false);
+ bool DebugLogging = false,
+ bool PrepareForThinLTO = false);
/// Construct the core LLVM module optimization pipeline.
///
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 0380bd991d7..ed04a116683 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -307,7 +307,8 @@ void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) {
FunctionPassManager
PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
- bool DebugLogging) {
+ bool DebugLogging,
+ bool PrepareForThinLTO) {
assert(Level != O0 && "Must request optimizations!");
FunctionPassManager FPM(DebugLogging);
@@ -364,10 +365,11 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
LPM2.addPass(IndVarSimplifyPass());
LPM2.addPass(LoopIdiomRecognizePass());
LPM2.addPass(LoopDeletionPass());
- // FIXME: The old pass manager has a hack to disable loop unrolling during
- // ThinLTO when using sample PGO. Need to either fix it or port some
- // workaround.
- LPM2.addPass(LoopUnrollPass::createFull(Level));
+ // Do not enable unrolling in PrepareForThinLTO phase during sample PGO
+ // because it changes IR to makes profile annotation in back compile
+ // inaccurate.
+ if (!PrepareForThinLTO || !PGOOpt || PGOOpt->SampleProfileFile.empty())
+ LPM2.addPass(LoopUnrollPass::createFull(Level));
// We provide the opt remark emitter pass for LICM to use. We only need to do
// this once as it is immutable.
@@ -490,7 +492,8 @@ getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
ModulePassManager
PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
- bool DebugLogging) {
+ bool DebugLogging,
+ bool PrepareForThinLTO) {
ModulePassManager MPM(DebugLogging);
// Do basic inference of function attributes from known properties of system
@@ -544,8 +547,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile));
// Indirect call promotion that promotes intra-module targes only.
- MPM.addPass(PGOIndirectCallPromotion(
- false, PGOOpt && !PGOOpt->SampleProfileFile.empty()));
+ // Do not enable it in PrepareForThinLTO phase during sample PGO because
+ // it changes IR to makes profile annotation in back compile inaccurate.
+ if (!PrepareForThinLTO || PGOOpt->SampleProfileFile.empty())
+ MPM.addPass(PGOIndirectCallPromotion(
+ false, PGOOpt && !PGOOpt->SampleProfileFile.empty()));
}
// Require the GlobalsAA analysis for the module so we can query it within
@@ -570,7 +576,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Run the inliner first. The theory is that we are walking bottom-up and so
// the callees have already been fully optimized, and we want to inline them
// into the callers so that our optimizations can reflect that.
- MainCGPipeline.addPass(InlinerPass(getInlineParamsFromOptLevel(Level)));
+ // For PrepareForThinLTO pass, we disable hot-caller heuristic for sample PGO
+ // because it makes profile annotation in the backend inaccurate.
+ InlineParams IP = getInlineParamsFromOptLevel(Level);
+ if (PrepareForThinLTO && PGOOpt && !PGOOpt->SampleProfileFile.empty())
+ IP.HotCallSiteThreshold = 0;
+ MainCGPipeline.addPass(InlinerPass(IP));
// Now deduce any function attributes based in the current code.
MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
@@ -583,7 +594,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Lastly, add the core function simplification pipeline nested inside the
// CGSCC walk.
MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
- buildFunctionSimplificationPipeline(Level, DebugLogging)));
+ buildFunctionSimplificationPipeline(Level, DebugLogging,
+ PrepareForThinLTO)));
// We wrap the CGSCC pipeline in a devirtualization repeater. This will try
// to detect when we devirtualize indirect calls and iterate the SCC passes
@@ -726,7 +738,8 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
MPM.addPass(ForceFunctionAttrsPass());
// Add the core simplification pipeline.
- MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging));
+ MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging,
+ /*PrepareForThinLTO=*/false));
// Now add the optimization pipeline.
MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging));
@@ -747,7 +760,8 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
// If we are planning to perform ThinLTO later, we don't bloat the code with
// unrolling/vectorization/... now. Just simplify the module as much as we
// can.
- MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging));
+ MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging,
+ /*PrepareForThinLTO=*/true));
// Run partial inlining pass to partially inline functions that have
// large bodies.
@@ -785,7 +799,8 @@ PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level,
!PGOOpt->ProfileUseFile.empty()));
// Add the core simplification pipeline.
- MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging));
+ MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging,
+ /*PrepareForThinLTO=*/false));
// Now add the optimization pipeline.
MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging));
OpenPOWER on IntegriCloud