diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Analysis/LoopPassManager.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Passes/PassBuilder.cpp | 293 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/LoopRotation.cpp | 6 |
3 files changed, 283 insertions, 17 deletions
diff --git a/llvm/lib/Analysis/LoopPassManager.cpp b/llvm/lib/Analysis/LoopPassManager.cpp index 5b40a0beff7..deb68e75ded 100644 --- a/llvm/lib/Analysis/LoopPassManager.cpp +++ b/llvm/lib/Analysis/LoopPassManager.cpp @@ -50,6 +50,7 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() { PA.preserve<ScalarEvolutionAnalysis>(); // TODO: What we really want to do here is preserve an AA category, but that // concept doesn't exist yet. + PA.preserve<AAManager>(); PA.preserve<BasicAA>(); PA.preserve<GlobalsAA>(); PA.preserve<SCEVAA>(); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 95d4b64dc87..7ae9e97e5cd 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -140,6 +140,21 @@ using namespace llvm; static Regex DefaultAliasRegex("^(default|lto-pre-link|lto)<(O[0123sz])>$"); +static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { + switch (Level) { + case PassBuilder::O0: + case PassBuilder::O1: + case PassBuilder::O2: + case PassBuilder::O3: + return false; + + case PassBuilder::Os: + case PassBuilder::Oz: + return true; + } + llvm_unreachable("Invalid optimization level!"); +} + namespace { /// \brief No-op module pass which does nothing. @@ -252,35 +267,280 @@ void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) { #include "PassRegistry.def" } -void PassBuilder::addPerModuleDefaultPipeline(ModulePassManager &MPM, - OptimizationLevel Level, - bool DebugLogging) { - // FIXME: Finish fleshing this out to match the legacy pipelines. +FunctionPassManager +PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, + bool DebugLogging) { + assert(Level != O0 && "Must request optimizations!"); + FunctionPassManager FPM(DebugLogging); + + // Form SSA out of local memory accesses after breaking apart aggregates into + // scalars. + FPM.addPass(SROA()); + + // Catch trivial redundancies + FPM.addPass(EarlyCSEPass()); + + // Speculative execution if the target has divergent branches; otherwise nop. + FPM.addPass(SpeculativeExecutionPass()); + + // Optimize based on known information about branches, and cleanup afterward. + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + + if (!isOptimizingForSize(Level)) + FPM.addPass(LibCallsShrinkWrapPass()); + + FPM.addPass(TailCallElimPass()); + FPM.addPass(SimplifyCFGPass()); + + // Form canonically associated expression trees, and simplify the trees using + // basic mathematical properties. For example, this will form (nearly) + // minimal multiplication trees. + FPM.addPass(ReassociatePass()); + + // Add the primary loop simplification pipeline. + // FIXME: Currently this is split into two loop pass pipelines because we run + // some function passes in between them. These can and should be replaced by + // loop pass equivalenst but those aren't ready yet. Specifically, + // `SimplifyCFGPass` and `InstCombinePass` are used. We have + // `LoopSimplifyCFGPass` which isn't yet powerful enough, and the closest to + // the other we have is `LoopInstSimplify`. + LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging); + + // Rotate Loop - disable header duplication at -Oz + LPM1.addPass(LoopRotatePass(Level != Oz)); + LPM1.addPass(LICMPass()); + // FIXME: Enable these when the loop pass manager can support updating the + // loop nest after transformations and we finsih porting the loop passes. +#if 0 + LPM1.addPass(LoopUnswitchPass(/* OptimizeForSize */ Level != O3)); + LPM2.addPass(IndVarSimplifyPass()); + LPM2.addPass(LoopIdiomPass()); + LPM2.addPass(LoopDeletionPass()); + LPM2.addPass(SimpleLoopUnrollPass()); +#endif + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1))); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2))); + + // Eliminate redundancies. + if (Level != O1) { + // These passes add substantial compile time so skip them at O1. + FPM.addPass(MergedLoadStoreMotionPass()); + FPM.addPass(GVN()); + } + + // Specially optimize memory movement as it doesn't look like dataflow in SSA. + FPM.addPass(MemCpyOptPass()); + + // Sparse conditional constant propagation. + // FIXME: It isn't clear why we do this *after* loop passes rather than + // before... + FPM.addPass(SCCPPass()); + + // Delete dead bit computations (instcombine runs after to fold away the dead + // computations, and then ADCE will run later to exploit any new DCE + // opportunities that creates). + FPM.addPass(BDCEPass()); + + // Run instcombine after redundancy and dead bit elimination to exploit + // opportunities opened up by them. + FPM.addPass(InstCombinePass()); + + // Re-consider control flow based optimizations after redundancy elimination, + // redo DCE, etc. + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(DSEPass()); + FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); + + // Finally, do an expensive DCE pass to catch all the dead code exposed by + // the simplifications and basic cleanup after all the simplifications. + FPM.addPass(ADCEPass()); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + + return FPM; +} + +ModulePassManager +PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, + bool DebugLogging) { + assert(Level != O0 && "Must request optimizations for the default pipeline!"); + ModulePassManager MPM(DebugLogging); + + // Force any function attributes we want the rest of the pipeline te observe. + MPM.addPass(ForceFunctionAttrsPass()); + + // Do basic inference of function attributes from known properties of system + // libraries and other oracles. + MPM.addPass(InferFunctionAttrsPass()); + + // Create an early function pass manager to cleanup the output of the + // frontend. FunctionPassManager EarlyFPM(DebugLogging); EarlyFPM.addPass(SimplifyCFGPass()); EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(LowerExpectIntrinsicPass()); - + EarlyFPM.addPass(GVNHoistPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); + + // Interprocedural constant propagation now that basic cleanup has occured + // and prior to optimizing globals. + // FIXME: This position in the pipeline hasn't been carefully considered in + // years, it should be re-analyzed. + MPM.addPass(IPSCCPPass()); + + // Optimize globals to try and fold them into constants. + MPM.addPass(GlobalOptPass()); + + // Promote any localized globals to SSA registers. + // FIXME: Should this instead by a run of SROA? + // FIXME: We should probably run instcombine and simplify-cfg afterward to + // delete control flows that are dead once globals have been folded to + // constants. + MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); + + // Remove any dead arguments exposed by cleanups and constand folding + // globals. + MPM.addPass(DeadArgumentEliminationPass()); + + // Create a small function pass pipeline to cleanup after all the global + // optimizations. + FunctionPassManager GlobalCleanupPM(DebugLogging); + GlobalCleanupPM.addPass(InstCombinePass()); + GlobalCleanupPM.addPass(SimplifyCFGPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM))); + + // FIXME: Enable this when cross-IR-unit analysis invalidation is working. +#if 0 + MPM.addPass(RequireAnalysisPass<GlobalsAA>()); +#endif + + // Now begin the main postorder CGSCC pipeline. + // FIXME: The current CGSCC pipeline has its origins in the legacy pass + // manager and trying to emulate its precise behavior. Much of this doesn't + // make a lot of sense and we should revisit the core CGSCC structure. + CGSCCPassManager MainCGPipeline(DebugLogging); + + // Note: historically, the PruneEH pass was run first to deduce nounwind and + // generally clean up exception handling overhead. It isn't clear this is + // valuable as the inliner doesn't currently care whether it is inlining an + // invoke or a call. + + // Run the inliner first. The theory is that we are walking bottom-up and so + // the callees have already been fully optimized, and we want to inline them + // into the callers so that our optimizations can reflect that. + // FIXME; Customize the threshold based on optimization level. + MainCGPipeline.addPass(InlinerPass()); + + // Now deduce any function attributes based in the current code. + MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); + + // Lastly, add the core function simplification pipeline nested inside the + // CGSCC walk. + MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( + buildFunctionSimplificationPipeline(Level, DebugLogging))); + + MPM.addPass( + createModuleToPostOrderCGSCCPassAdaptor(std::move(MainCGPipeline))); + + // This ends the canonicalization and simplification phase of the pipeline. + // At this point, we expect to have canonical and simple IR which we begin + // *optimizing* for efficient execution going forward. + + // Eliminate externally available functions now that inlining is over -- we + // won't emit these anyways. + MPM.addPass(EliminateAvailableExternallyPass()); + + // Do RPO function attribute inference across the module to forward-propagate + // attributes where applicable. + // FIXME: Is this really an optimization rather than a canonicalization? + MPM.addPass(ReversePostOrderFunctionAttrsPass()); + + // Recompute GloblasAA here prior to function passes. This is particularly + // useful as the above will have inlined, DCE'ed, and function-attr + // propagated everything. We should at this point have a reasonably minimal + // and richly annotated call graph. By computing aliasing and mod/ref + // information for all local globals here, the late loop passes and notably + // the vectorizer will be able to use them to help recognize vectorizable + // memory operations. + // FIXME: Enable this once analysis invalidation is fully supported. +#if 0 + MPM.addPass(Require<GlobalsAA>()); +#endif + + FunctionPassManager OptimizePM(DebugLogging); + OptimizePM.addPass(Float2IntPass()); + // FIXME: We need to run some loop optimizations to re-rotate loops after + // simplify-cfg and others undo their rotation. + + // Optimize the loop execution. These passes operate on entire loop nests + // rather than on each loop in an inside-out manner, and so they are actually + // function passes. + OptimizePM.addPass(LoopDistributePass()); + OptimizePM.addPass(LoopVectorizePass()); + // FIXME: Need to port Loop Load Elimination and add it here. + OptimizePM.addPass(InstCombinePass()); + + // Optimize parallel scalar instruction chains into SIMD instructions. + OptimizePM.addPass(SLPVectorizerPass()); + + // Cleanup after vectorizers. + OptimizePM.addPass(SimplifyCFGPass()); + OptimizePM.addPass(InstCombinePass()); + + // Unroll small loops to hide loop backedge latency and saturate any parallel + // execution resources of an out-of-order processor. + // FIXME: Need to add once loop pass pipeline is available. + + // FIXME: Add the loop sink pass when ported. + + // FIXME: Add cleanup from the loop pass manager when we're forming LCSSA + // here. + + // Now that we've vectorized and unrolled loops, we may have more refined + // alignment information, try to re-derive it here. + OptimizePM.addPass(AlignmentFromAssumptionsPass()); + + // ADd the core optimizing pipeline. + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM))); + + // Now we need to do some global optimization transforms. + // FIXME: It would seem like these should come first in the optimization + // pipeline and maybe be the bottom of the canonicalization pipeline? Weird + // ordering here. + MPM.addPass(GlobalDCEPass()); + MPM.addPass(ConstantMergePass()); + + return MPM; } -void PassBuilder::addLTOPreLinkDefaultPipeline(ModulePassManager &MPM, - OptimizationLevel Level, - bool DebugLogging) { +ModulePassManager +PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level, + bool DebugLogging) { + assert(Level != O0 && "Must request optimizations for the default pipeline!"); // FIXME: We should use a customized pre-link pipeline! - addPerModuleDefaultPipeline(MPM, Level, DebugLogging); + return buildPerModuleDefaultPipeline(Level, DebugLogging); } -void PassBuilder::addLTODefaultPipeline(ModulePassManager &MPM, - OptimizationLevel Level, - bool DebugLogging) { +ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, + bool DebugLogging) { + assert(Level != O0 && "Must request optimizations for the default pipeline!"); + ModulePassManager MPM(DebugLogging); + // FIXME: Finish fleshing this out to match the legacy LTO pipelines. FunctionPassManager LateFPM(DebugLogging); LateFPM.addPass(InstCombinePass()); LateFPM.addPass(SimplifyCFGPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM))); + + return MPM; } static Optional<int> parseRepeatPassName(StringRef Name) { @@ -500,14 +760,17 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM, .Case("O3", O3) .Case("Os", Os) .Case("Oz", Oz); + if (L == O0) + // At O0 we do nothing at all! + return true; if (Matches[1] == "default") { - addPerModuleDefaultPipeline(MPM, L, DebugLogging); + MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging)); } else if (Matches[1] == "lto-pre-link") { - addLTOPreLinkDefaultPipeline(MPM, L, DebugLogging); + MPM.addPass(buildLTOPreLinkDefaultPipeline(L, DebugLogging)); } else { assert(Matches[1] == "lto" && "Not one of the matched options!"); - addLTODefaultPipeline(MPM, L, DebugLogging); + MPM.addPass(buildLTODefaultPipeline(L, DebugLogging)); } return true; } diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp index 32d2caa78da..0225cc32570 100644 --- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -622,7 +622,8 @@ bool LoopRotate::processLoop(Loop *L) { return MadeChange; } -LoopRotatePass::LoopRotatePass() {} +LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication) + : EnableHeaderDuplication(EnableHeaderDuplication) {} PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM) { auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager(); @@ -636,7 +637,8 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM) { // Optional analyses. auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F); auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F); - LoopRotate LR(DefaultRotationThreshold, LI, TTI, AC, DT, SE); + int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0; + LoopRotate LR(Threshold, LI, TTI, AC, DT, SE); bool Changed = LR.processLoop(&L); if (!Changed) |

