summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/LoopPassManager.cpp1
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp293
-rw-r--r--llvm/lib/Transforms/Scalar/LoopRotation.cpp6
3 files changed, 283 insertions, 17 deletions
diff --git a/llvm/lib/Analysis/LoopPassManager.cpp b/llvm/lib/Analysis/LoopPassManager.cpp
index 5b40a0beff7..deb68e75ded 100644
--- a/llvm/lib/Analysis/LoopPassManager.cpp
+++ b/llvm/lib/Analysis/LoopPassManager.cpp
@@ -50,6 +50,7 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
PA.preserve<ScalarEvolutionAnalysis>();
// TODO: What we really want to do here is preserve an AA category, but that
// concept doesn't exist yet.
+ PA.preserve<AAManager>();
PA.preserve<BasicAA>();
PA.preserve<GlobalsAA>();
PA.preserve<SCEVAA>();
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 95d4b64dc87..7ae9e97e5cd 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -140,6 +140,21 @@ using namespace llvm;
static Regex DefaultAliasRegex("^(default|lto-pre-link|lto)<(O[0123sz])>$");
+static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
+ switch (Level) {
+ case PassBuilder::O0:
+ case PassBuilder::O1:
+ case PassBuilder::O2:
+ case PassBuilder::O3:
+ return false;
+
+ case PassBuilder::Os:
+ case PassBuilder::Oz:
+ return true;
+ }
+ llvm_unreachable("Invalid optimization level!");
+}
+
namespace {
/// \brief No-op module pass which does nothing.
@@ -252,35 +267,280 @@ void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) {
#include "PassRegistry.def"
}
-void PassBuilder::addPerModuleDefaultPipeline(ModulePassManager &MPM,
- OptimizationLevel Level,
- bool DebugLogging) {
- // FIXME: Finish fleshing this out to match the legacy pipelines.
+FunctionPassManager
+PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
+ bool DebugLogging) {
+ assert(Level != O0 && "Must request optimizations!");
+ FunctionPassManager FPM(DebugLogging);
+
+ // Form SSA out of local memory accesses after breaking apart aggregates into
+ // scalars.
+ FPM.addPass(SROA());
+
+ // Catch trivial redundancies
+ FPM.addPass(EarlyCSEPass());
+
+ // Speculative execution if the target has divergent branches; otherwise nop.
+ FPM.addPass(SpeculativeExecutionPass());
+
+ // Optimize based on known information about branches, and cleanup afterward.
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+
+ if (!isOptimizingForSize(Level))
+ FPM.addPass(LibCallsShrinkWrapPass());
+
+ FPM.addPass(TailCallElimPass());
+ FPM.addPass(SimplifyCFGPass());
+
+ // Form canonically associated expression trees, and simplify the trees using
+ // basic mathematical properties. For example, this will form (nearly)
+ // minimal multiplication trees.
+ FPM.addPass(ReassociatePass());
+
+ // Add the primary loop simplification pipeline.
+ // FIXME: Currently this is split into two loop pass pipelines because we run
+ // some function passes in between them. These can and should be replaced by
+ // loop pass equivalenst but those aren't ready yet. Specifically,
+ // `SimplifyCFGPass` and `InstCombinePass` are used. We have
+ // `LoopSimplifyCFGPass` which isn't yet powerful enough, and the closest to
+ // the other we have is `LoopInstSimplify`.
+ LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging);
+
+ // Rotate Loop - disable header duplication at -Oz
+ LPM1.addPass(LoopRotatePass(Level != Oz));
+ LPM1.addPass(LICMPass());
+ // FIXME: Enable these when the loop pass manager can support updating the
+ // loop nest after transformations and we finsih porting the loop passes.
+#if 0
+ LPM1.addPass(LoopUnswitchPass(/* OptimizeForSize */ Level != O3));
+ LPM2.addPass(IndVarSimplifyPass());
+ LPM2.addPass(LoopIdiomPass());
+ LPM2.addPass(LoopDeletionPass());
+ LPM2.addPass(SimpleLoopUnrollPass());
+#endif
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1)));
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2)));
+
+ // Eliminate redundancies.
+ if (Level != O1) {
+ // These passes add substantial compile time so skip them at O1.
+ FPM.addPass(MergedLoadStoreMotionPass());
+ FPM.addPass(GVN());
+ }
+
+ // Specially optimize memory movement as it doesn't look like dataflow in SSA.
+ FPM.addPass(MemCpyOptPass());
+
+ // Sparse conditional constant propagation.
+ // FIXME: It isn't clear why we do this *after* loop passes rather than
+ // before...
+ FPM.addPass(SCCPPass());
+
+ // Delete dead bit computations (instcombine runs after to fold away the dead
+ // computations, and then ADCE will run later to exploit any new DCE
+ // opportunities that creates).
+ FPM.addPass(BDCEPass());
+
+ // Run instcombine after redundancy and dead bit elimination to exploit
+ // opportunities opened up by them.
+ FPM.addPass(InstCombinePass());
+
+ // Re-consider control flow based optimizations after redundancy elimination,
+ // redo DCE, etc.
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+ FPM.addPass(DSEPass());
+ FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
+
+ // Finally, do an expensive DCE pass to catch all the dead code exposed by
+ // the simplifications and basic cleanup after all the simplifications.
+ FPM.addPass(ADCEPass());
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+
+ return FPM;
+}
+
+ModulePassManager
+PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
+ bool DebugLogging) {
+ assert(Level != O0 && "Must request optimizations for the default pipeline!");
+ ModulePassManager MPM(DebugLogging);
+
+ // Force any function attributes we want the rest of the pipeline te observe.
+ MPM.addPass(ForceFunctionAttrsPass());
+
+ // Do basic inference of function attributes from known properties of system
+ // libraries and other oracles.
+ MPM.addPass(InferFunctionAttrsPass());
+
+ // Create an early function pass manager to cleanup the output of the
+ // frontend.
FunctionPassManager EarlyFPM(DebugLogging);
EarlyFPM.addPass(SimplifyCFGPass());
EarlyFPM.addPass(SROA());
EarlyFPM.addPass(EarlyCSEPass());
EarlyFPM.addPass(LowerExpectIntrinsicPass());
-
+ EarlyFPM.addPass(GVNHoistPass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
+
+ // Interprocedural constant propagation now that basic cleanup has occured
+ // and prior to optimizing globals.
+ // FIXME: This position in the pipeline hasn't been carefully considered in
+ // years, it should be re-analyzed.
+ MPM.addPass(IPSCCPPass());
+
+ // Optimize globals to try and fold them into constants.
+ MPM.addPass(GlobalOptPass());
+
+ // Promote any localized globals to SSA registers.
+ // FIXME: Should this instead by a run of SROA?
+ // FIXME: We should probably run instcombine and simplify-cfg afterward to
+ // delete control flows that are dead once globals have been folded to
+ // constants.
+ MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
+
+ // Remove any dead arguments exposed by cleanups and constand folding
+ // globals.
+ MPM.addPass(DeadArgumentEliminationPass());
+
+ // Create a small function pass pipeline to cleanup after all the global
+ // optimizations.
+ FunctionPassManager GlobalCleanupPM(DebugLogging);
+ GlobalCleanupPM.addPass(InstCombinePass());
+ GlobalCleanupPM.addPass(SimplifyCFGPass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM)));
+
+ // FIXME: Enable this when cross-IR-unit analysis invalidation is working.
+#if 0
+ MPM.addPass(RequireAnalysisPass<GlobalsAA>());
+#endif
+
+ // Now begin the main postorder CGSCC pipeline.
+ // FIXME: The current CGSCC pipeline has its origins in the legacy pass
+ // manager and trying to emulate its precise behavior. Much of this doesn't
+ // make a lot of sense and we should revisit the core CGSCC structure.
+ CGSCCPassManager MainCGPipeline(DebugLogging);
+
+ // Note: historically, the PruneEH pass was run first to deduce nounwind and
+ // generally clean up exception handling overhead. It isn't clear this is
+ // valuable as the inliner doesn't currently care whether it is inlining an
+ // invoke or a call.
+
+ // Run the inliner first. The theory is that we are walking bottom-up and so
+ // the callees have already been fully optimized, and we want to inline them
+ // into the callers so that our optimizations can reflect that.
+ // FIXME; Customize the threshold based on optimization level.
+ MainCGPipeline.addPass(InlinerPass());
+
+ // Now deduce any function attributes based in the current code.
+ MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
+
+ // Lastly, add the core function simplification pipeline nested inside the
+ // CGSCC walk.
+ MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+ buildFunctionSimplificationPipeline(Level, DebugLogging)));
+
+ MPM.addPass(
+ createModuleToPostOrderCGSCCPassAdaptor(std::move(MainCGPipeline)));
+
+ // This ends the canonicalization and simplification phase of the pipeline.
+ // At this point, we expect to have canonical and simple IR which we begin
+ // *optimizing* for efficient execution going forward.
+
+ // Eliminate externally available functions now that inlining is over -- we
+ // won't emit these anyways.
+ MPM.addPass(EliminateAvailableExternallyPass());
+
+ // Do RPO function attribute inference across the module to forward-propagate
+ // attributes where applicable.
+ // FIXME: Is this really an optimization rather than a canonicalization?
+ MPM.addPass(ReversePostOrderFunctionAttrsPass());
+
+ // Recompute GloblasAA here prior to function passes. This is particularly
+ // useful as the above will have inlined, DCE'ed, and function-attr
+ // propagated everything. We should at this point have a reasonably minimal
+ // and richly annotated call graph. By computing aliasing and mod/ref
+ // information for all local globals here, the late loop passes and notably
+ // the vectorizer will be able to use them to help recognize vectorizable
+ // memory operations.
+ // FIXME: Enable this once analysis invalidation is fully supported.
+#if 0
+ MPM.addPass(Require<GlobalsAA>());
+#endif
+
+ FunctionPassManager OptimizePM(DebugLogging);
+ OptimizePM.addPass(Float2IntPass());
+ // FIXME: We need to run some loop optimizations to re-rotate loops after
+ // simplify-cfg and others undo their rotation.
+
+ // Optimize the loop execution. These passes operate on entire loop nests
+ // rather than on each loop in an inside-out manner, and so they are actually
+ // function passes.
+ OptimizePM.addPass(LoopDistributePass());
+ OptimizePM.addPass(LoopVectorizePass());
+ // FIXME: Need to port Loop Load Elimination and add it here.
+ OptimizePM.addPass(InstCombinePass());
+
+ // Optimize parallel scalar instruction chains into SIMD instructions.
+ OptimizePM.addPass(SLPVectorizerPass());
+
+ // Cleanup after vectorizers.
+ OptimizePM.addPass(SimplifyCFGPass());
+ OptimizePM.addPass(InstCombinePass());
+
+ // Unroll small loops to hide loop backedge latency and saturate any parallel
+ // execution resources of an out-of-order processor.
+ // FIXME: Need to add once loop pass pipeline is available.
+
+ // FIXME: Add the loop sink pass when ported.
+
+ // FIXME: Add cleanup from the loop pass manager when we're forming LCSSA
+ // here.
+
+ // Now that we've vectorized and unrolled loops, we may have more refined
+ // alignment information, try to re-derive it here.
+ OptimizePM.addPass(AlignmentFromAssumptionsPass());
+
+ // ADd the core optimizing pipeline.
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
+
+ // Now we need to do some global optimization transforms.
+ // FIXME: It would seem like these should come first in the optimization
+ // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
+ // ordering here.
+ MPM.addPass(GlobalDCEPass());
+ MPM.addPass(ConstantMergePass());
+
+ return MPM;
}
-void PassBuilder::addLTOPreLinkDefaultPipeline(ModulePassManager &MPM,
- OptimizationLevel Level,
- bool DebugLogging) {
+ModulePassManager
+PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level,
+ bool DebugLogging) {
+ assert(Level != O0 && "Must request optimizations for the default pipeline!");
// FIXME: We should use a customized pre-link pipeline!
- addPerModuleDefaultPipeline(MPM, Level, DebugLogging);
+ return buildPerModuleDefaultPipeline(Level, DebugLogging);
}
-void PassBuilder::addLTODefaultPipeline(ModulePassManager &MPM,
- OptimizationLevel Level,
- bool DebugLogging) {
+ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
+ bool DebugLogging) {
+ assert(Level != O0 && "Must request optimizations for the default pipeline!");
+ ModulePassManager MPM(DebugLogging);
+
// FIXME: Finish fleshing this out to match the legacy LTO pipelines.
FunctionPassManager LateFPM(DebugLogging);
LateFPM.addPass(InstCombinePass());
LateFPM.addPass(SimplifyCFGPass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
+
+ return MPM;
}
static Optional<int> parseRepeatPassName(StringRef Name) {
@@ -500,14 +760,17 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM,
.Case("O3", O3)
.Case("Os", Os)
.Case("Oz", Oz);
+ if (L == O0)
+ // At O0 we do nothing at all!
+ return true;
if (Matches[1] == "default") {
- addPerModuleDefaultPipeline(MPM, L, DebugLogging);
+ MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));
} else if (Matches[1] == "lto-pre-link") {
- addLTOPreLinkDefaultPipeline(MPM, L, DebugLogging);
+ MPM.addPass(buildLTOPreLinkDefaultPipeline(L, DebugLogging));
} else {
assert(Matches[1] == "lto" && "Not one of the matched options!");
- addLTODefaultPipeline(MPM, L, DebugLogging);
+ MPM.addPass(buildLTODefaultPipeline(L, DebugLogging));
}
return true;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 32d2caa78da..0225cc32570 100644
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -622,7 +622,8 @@ bool LoopRotate::processLoop(Loop *L) {
return MadeChange;
}
-LoopRotatePass::LoopRotatePass() {}
+LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication)
+ : EnableHeaderDuplication(EnableHeaderDuplication) {}
PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
@@ -636,7 +637,8 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM) {
// Optional analyses.
auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F);
auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
- LoopRotate LR(DefaultRotationThreshold, LI, TTI, AC, DT, SE);
+ int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0;
+ LoopRotate LR(Threshold, LI, TTI, AC, DT, SE);
bool Changed = LR.processLoop(&L);
if (!Changed)
OpenPOWER on IntegriCloud