summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h7
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp3
-rw-r--r--llvm/lib/Passes/PassRegistry.def2
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp104
-rw-r--r--llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll1
-rw-r--r--llvm/test/Transforms/LoopUnrollAndJam/disable.ll1
-rw-r--r--llvm/test/Transforms/LoopUnrollAndJam/pragma-explicit.ll1
-rw-r--r--llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll1
8 files changed, 70 insertions, 50 deletions
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
index 7920269b0fb..23396352859 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h
@@ -15,9 +15,7 @@
namespace llvm {
-class Loop;
-struct LoopStandardAnalysisResults;
-class LPMUpdater;
+class Function;
/// A simple loop rotation transformation.
class LoopUnrollAndJamPass : public PassInfoMixin<LoopUnrollAndJamPass> {
@@ -25,8 +23,7 @@ class LoopUnrollAndJamPass : public PassInfoMixin<LoopUnrollAndJamPass> {
public:
explicit LoopUnrollAndJamPass(int OptLevel = 2) : OptLevel(OptLevel) {}
- PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AR, LPMUpdater &U);
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
} // end namespace llvm
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 64d748f94f1..646eb7d26cb 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -970,8 +970,7 @@ ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
// across the loop nests.
// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
if (EnableUnrollAndJam && PTO.LoopUnrolling) {
- OptimizePM.addPass(
- createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
+ OptimizePM.addPass(LoopUnrollAndJamPass(Level));
}
OptimizePM.addPass(LoopUnrollPass(
LoopUnrollOptions(Level, /*OnlyWhenForced=*/!PTO.LoopUnrolling,
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 66b38872b38..3efb57cd358 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -235,6 +235,7 @@ FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass())
FUNCTION_PASS("sroa", SROA())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
+FUNCTION_PASS("unroll-and-jam", LoopUnrollAndJamPass())
FUNCTION_PASS("verify", VerifierPass())
FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
FUNCTION_PASS("verify<loops>", LoopVerifierPass())
@@ -307,7 +308,6 @@ LOOP_PASS("simplify-cfg", LoopSimplifyCFGPass())
LOOP_PASS("strength-reduce", LoopStrengthReducePass())
LOOP_PASS("indvars", IndVarSimplifyPass())
LOOP_PASS("irce", IRCEPass())
-LOOP_PASS("unroll-and-jam", LoopUnrollAndJamPass())
LOOP_PASS("unroll-full", LoopFullUnrollPass())
LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
LOOP_PASS("print<ddg>", DDGAnalysisPrinterPass(dbgs()))
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 558f63113db..92ad8dafa5a 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -427,51 +427,76 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
return UnrollResult;
}
+static bool tryToUnrollAndJamLoop(Function &F, DominatorTree &DT, LoopInfo &LI,
+ ScalarEvolution &SE,
+ const TargetTransformInfo &TTI,
+ AssumptionCache &AC, DependenceInfo &DI,
+ OptimizationRemarkEmitter &ORE,
+ int OptLevel) {
+ bool DidSomething = false;
+
+ // The loop unroll and jam pass requires loops to be in simplified form, and also needs LCSSA.
+ // Since simplification may add new inner loops, it has to run before the
+ // legality and profitability checks. This means running the loop unroll and jam pass
+ // will simplify all loops, regardless of whether anything end up being
+ // unroll and jammed.
+ for (auto &L : LI) {
+ DidSomething |=
+ simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */);
+ DidSomething |= formLCSSARecursively(*L, DT, &LI, &SE);
+ }
+
+ SmallPriorityWorklist<Loop *, 4> Worklist;
+ internal::appendLoopsToWorklist(reverse(LI), Worklist);
+ while (!Worklist.empty()) {
+ Loop *L = Worklist.pop_back_val();
+ formLCSSA(*L, DT, &LI, &SE);
+ LoopUnrollResult Result =
+ tryToUnrollAndJamLoop(L, DT, &LI, SE, TTI, AC, DI, ORE, OptLevel);
+ if (Result != LoopUnrollResult::Unmodified)
+ DidSomething = true;
+ }
+
+ return DidSomething;
+}
+
namespace {
-class LoopUnrollAndJam : public LoopPass {
+class LoopUnrollAndJam : public FunctionPass {
public:
static char ID; // Pass ID, replacement for typeid
unsigned OptLevel;
- LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) {
+ LoopUnrollAndJam(int OptLevel = 2) : FunctionPass(ID), OptLevel(OptLevel) {
initializeLoopUnrollAndJamPass(*PassRegistry::getPassRegistry());
}
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L))
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
return false;
- Function &F = *L->getHeader()->getParent();
-
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const TargetTransformInfo &TTI =
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
- // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
- // pass. Function analyses need to be preserved across loop transformations
- // but ORE cannot be preserved (see comment before the pass definition).
- OptimizationRemarkEmitter ORE(&F);
-
- LoopUnrollResult Result =
- tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
+ auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- if (Result == LoopUnrollResult::FullyUnrolled)
- LPM.markLoopAsDeleted(*L);
-
- return Result != LoopUnrollResult::Unmodified;
+ return tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
}
/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DependenceAnalysisWrapperPass>();
- getLoopAnalysisUsage(AU);
+ AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
};
@@ -481,10 +506,13 @@ char LoopUnrollAndJam::ID = 0;
INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam",
"Unroll and Jam loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(LoopUnrollAndJam, "loop-unroll-and-jam",
"Unroll and Jam loops", false, false)
@@ -492,26 +520,18 @@ Pass *llvm::createLoopUnrollAndJamPass(int OptLevel) {
return new LoopUnrollAndJam(OptLevel);
}
-PreservedAnalyses LoopUnrollAndJamPass::run(Loop &L, LoopAnalysisManager &AM,
- LoopStandardAnalysisResults &AR,
- LPMUpdater &) {
- const auto &FAM =
- AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
- Function *F = L.getHeader()->getParent();
-
- auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
- // FIXME: This should probably be optional rather than required.
- if (!ORE)
- report_fatal_error(
- "LoopUnrollAndJamPass: OptimizationRemarkEmitterAnalysis not cached at "
- "a higher level");
-
- DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
-
- LoopUnrollResult Result = tryToUnrollAndJamLoop(
- &L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, DI, *ORE, OptLevel);
-
- if (Result == LoopUnrollResult::Unmodified)
+PreservedAnalyses LoopUnrollAndJamPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+ TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
+ AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
+ DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ DependenceInfo &DI = AM.getResult<DependenceAnalysis>(F);
+ OptimizationRemarkEmitter &ORE =
+ AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
+ if (!tryToUnrollAndJamLoop(F, DT, LI, SE, TTI, AC, DI, ORE, OptLevel))
return PreservedAnalyses::all();
return getLoopPassPreservedAnalyses();
diff --git a/llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll b/llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll
index 890683043b4..d584238745c 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/dependencies.ll
@@ -1,4 +1,5 @@
; RUN: opt -basicaa -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 < %s -S | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
diff --git a/llvm/test/Transforms/LoopUnrollAndJam/disable.ll b/llvm/test/Transforms/LoopUnrollAndJam/disable.ll
index 4a00937b9c5..6e879896c55 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/disable.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/disable.ll
@@ -1,4 +1,5 @@
; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -pass-remarks=loop-unroll-and-jam < %s -S 2>&1 | FileCheck %s
+; RUN: opt -passes='unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 -pass-remarks=loop-unroll-and-jam < %s -S 2>&1 | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
diff --git a/llvm/test/Transforms/LoopUnrollAndJam/pragma-explicit.ll b/llvm/test/Transforms/LoopUnrollAndJam/pragma-explicit.ll
index 5254c779d0f..f2c12702f9e 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/pragma-explicit.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/pragma-explicit.ll
@@ -1,4 +1,5 @@
; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -unroll-runtime -unroll-partial-threshold=60 < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='unroll-and-jam' -allow-unroll-and-jam -unroll-runtime -unroll-partial-threshold=60 < %s -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
index bdb47c27f1c..7580b50f870 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -basicaa -tbaa -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='unroll-and-jam' -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
OpenPOWER on IntegriCloud