diff options
-rw-r--r-- | llvm/include/llvm/Transforms/Scalar/LoopLoadElimination.h | 30 | ||||
-rw-r--r-- | llvm/lib/Passes/PassBuilder.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Passes/PassRegistry.def | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp | 87 | ||||
-rw-r--r-- | llvm/test/Other/new-pm-defaults.ll | 2 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopLoadElim/backward.ll | 1 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopLoadElim/forward.ll | 1 |
7 files changed, 99 insertions, 28 deletions
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopLoadElimination.h b/llvm/include/llvm/Transforms/Scalar/LoopLoadElimination.h new file mode 100644 index 00000000000..7a007a7e822 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/LoopLoadElimination.h @@ -0,0 +1,30 @@ +//===---- LoopLoadElimination.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This header defines the LoopLoadEliminationPass object. This pass forwards +/// loaded values around loop backedges to allow their use in subsequent +/// iterations. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOOPLOADELIMINATION_H +#define LLVM_TRANSFORMS_SCALAR_LOOPLOADELIMINATION_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// Pass to forward loads in a loop around the backedge to subsequent +/// iterations. +struct LoopLoadEliminationPass : public PassInfoMixin<LoopLoadEliminationPass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} + +#endif // LLVM_TRANSFORMS_SCALAR_LOOPLOADELIMINATION_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index d9ce15dc377..efdb54adf03 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -104,6 +104,7 @@ #include "llvm/Transforms/Scalar/LoopDistribute.h" #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Scalar/LoopInstSimplify.h" +#include "llvm/Transforms/Scalar/LoopLoadElimination.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/LoopPredication.h" #include "llvm/Transforms/Scalar/LoopRotation.h" @@ -504,7 +505,9 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, // Now run the core loop vectorizer. OptimizePM.addPass(LoopVectorizePass()); - // FIXME: Need to port Loop Load Elimination and add it here. + // Eliminate loads by forwarding stores from the previous iteration to loads + // of the current iteration. + OptimizePM.addPass(LoopLoadEliminationPass()); // Cleanup after the loop optimization passes. OptimizePM.addPass(InstCombinePass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 23a932fe1a8..5b2e63e8501 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -170,6 +170,7 @@ FUNCTION_PASS("jump-threading", JumpThreadingPass()) FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) FUNCTION_PASS("lcssa", LCSSAPass()) FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass()) +FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass()) FUNCTION_PASS("loop-distribute", LoopDistributePass()) FUNCTION_PASS("loop-vectorize", LoopVectorizePass()) FUNCTION_PASS("print", PrintFunctionPass(dbgs())) diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index 8fb580183e3..b44cca4a90f 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -20,13 +20,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/LoopLoadElimination.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" @@ -45,9 +46,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/LoopVersioning.h" -#include <forward_list> -#include <cassert> #include <algorithm> +#include <cassert> +#include <forward_list> #include <set> #include <tuple> #include <utility> @@ -558,6 +559,32 @@ private: PredicatedScalarEvolution PSE; }; +static bool +eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, + function_ref<const LoopAccessInfo &(Loop &)> GetLAI) { + // Build up a worklist of inner-loops to transform to avoid iterator + // invalidation. + // FIXME: This logic comes from other passes that actually change the loop + // nest structure. It isn't clear this is necessary (or useful) for a pass + // which merely optimizes the use of loads in a loop. + SmallVector<Loop *, 8> Worklist; + + for (Loop *TopLevelLoop : LI) + for (Loop *L : depth_first(TopLevelLoop)) + // We only handle inner-most loops. + if (L->empty()) + Worklist.push_back(L); + + // Now walk the identified inner loops. + bool Changed = false; + for (Loop *L : Worklist) { + // The actual work is performed by LoadEliminationForLoop. + LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT); + Changed |= LEL.processLoop(); + } + return Changed; +} + /// \brief The pass. Most of the work is delegated to the per-loop /// LoadEliminationForLoop class. class LoopLoadElimination : public FunctionPass { @@ -570,32 +597,14 @@ public: if (skipFunction(F)) return false; - auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>(); - auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - - // Build up a worklist of inner-loops to vectorize. This is necessary as the - // act of distributing a loop creates new loops and can invalidate iterators - // across the loops. - SmallVector<Loop *, 8> Worklist; - - for (Loop *TopLevelLoop : *LI) - for (Loop *L : depth_first(TopLevelLoop)) - // We only handle inner-most loops. - if (L->empty()) - Worklist.push_back(L); - - // Now walk the identified inner loops. - bool Changed = false; - for (Loop *L : Worklist) { - const LoopAccessInfo &LAI = LAA->getInfo(L); - // The actual work is performed by LoadEliminationForLoop. - LoadEliminationForLoop LEL(L, LI, LAI, DT); - Changed |= LEL.processLoop(); - } + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>(); + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); // Process each loop nest in the function. - return Changed; + return eliminateLoadsAcrossLoops( + F, LI, DT, + [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); }); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -631,4 +640,28 @@ FunctionPass *createLoopLoadEliminationPass() { return new LoopLoadElimination(); } +PreservedAnalyses LoopLoadEliminationPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &TTI = AM.getResult<TargetIRAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + auto &AA = AM.getResult<AAManager>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + + auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager(); + bool Changed = eliminateLoadsAcrossLoops( + F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & { + LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI}; + return LAM.getResult<LoopAccessAnalysis>(L, AR); + }); + + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + return PA; +} + } // end namespace llvm diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 39ba2d2f77e..56ee847fd29 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -134,6 +134,8 @@ ; CHECK-O-NEXT: Running pass: LoopVectorizePass ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis ; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis +; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass +; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: SLPVectorizerPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass diff --git a/llvm/test/Transforms/LoopLoadElim/backward.ll b/llvm/test/Transforms/LoopLoadElim/backward.ll index 7c750a51a2a..c0cec75bdd3 100644 --- a/llvm/test/Transforms/LoopLoadElim/backward.ll +++ b/llvm/test/Transforms/LoopLoadElim/backward.ll @@ -1,4 +1,5 @@ ; RUN: opt -loop-load-elim -S < %s | FileCheck %s +; RUN: opt -passes=loop-load-elim -S < %s | FileCheck %s ; Simple st->ld forwarding derived from a lexical backward dep. ; diff --git a/llvm/test/Transforms/LoopLoadElim/forward.ll b/llvm/test/Transforms/LoopLoadElim/forward.ll index 9a0e03a317c..0b270cab3ed 100644 --- a/llvm/test/Transforms/LoopLoadElim/forward.ll +++ b/llvm/test/Transforms/LoopLoadElim/forward.ll @@ -1,4 +1,5 @@ ; RUN: opt -loop-load-elim -S < %s | FileCheck %s +; RUN: opt -passes=loop-load-elim -S < %s | FileCheck %s ; Simple st->ld forwarding derived from a lexical forward dep. ; |