summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/InitializePasses.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopDistribute.h30
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp1
-rw-r--r--llvm/lib/Passes/PassRegistry.def1
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDistribute.cpp108
-rw-r--r--llvm/lib/Transforms/Scalar/Scalar.cpp2
-rw-r--r--llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll5
-rw-r--r--llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll5
8 files changed, 119 insertions, 35 deletions
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 86ea80eeba1..90ff82fe86d 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -177,7 +177,7 @@ void initializeLocalStackSlotPassPass(PassRegistry&);
void initializeLoopAccessLegacyAnalysisPass(PassRegistry&);
void initializeLoopDataPrefetchPass(PassRegistry&);
void initializeLoopDeletionLegacyPassPass(PassRegistry&);
-void initializeLoopDistributePass(PassRegistry&);
+void initializeLoopDistributeLegacyPass(PassRegistry&);
void initializeLoopExtractorPass(PassRegistry&);
void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&);
void initializeLoopInfoWrapperPassPass(PassRegistry&);
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopDistribute.h b/llvm/include/llvm/Transforms/Scalar/LoopDistribute.h
new file mode 100644
index 00000000000..ddde5954c21
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/LoopDistribute.h
@@ -0,0 +1,30 @@
+//===- LoopDistribute.cpp - Loop Distribution Pass --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Loop Distribution Pass. Its main focus is to
+// distribute loops that cannot be vectorized due to dependence cycles. It
+// tries to isolate the offending dependences into a new loop allowing
+// vectorization of the remaining parts.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class LoopDistributePass : public PassInfoMixin<LoopDistributePass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPDISTRIBUTE_H
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index aa276be9d35..0e64df80f91 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -92,6 +92,7 @@
#include "llvm/Transforms/Scalar/JumpThreading.h"
#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/Transforms/Scalar/LoopDeletion.h"
+#include "llvm/Transforms/Scalar/LoopDistribute.h"
#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
#include "llvm/Transforms/Scalar/LoopRotation.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 96f6cb810bd..b717057632b 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -151,6 +151,7 @@ FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass())
FUNCTION_PASS("jump-threading", JumpThreadingPass())
FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass())
FUNCTION_PASS("lcssa", LCSSAPass())
+FUNCTION_PASS("loop-distribute", LoopDistributePass())
FUNCTION_PASS("loop-vectorize", LoopVectorizePass())
FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index f1e0ec40530..7eca28ed2bb 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -22,6 +22,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/LoopDistribute.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include "llvm/ADT/STLExtras.h"
@@ -29,6 +30,7 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPassManager.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
@@ -597,7 +599,7 @@ public:
}
/// \brief Try to distribute an inner-most loop.
- bool processLoop(LoopAccessLegacyAnalysis *LAA) {
+ bool processLoop(std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
assert(L->empty() && "Only process inner loops.");
DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName()
@@ -610,7 +612,7 @@ public:
return fail("multiple exit blocks");
// LAA will check that we only have a single exiting block.
- LAI = &LAA->getInfo(L);
+ LAI = &GetLAA(*L);
// Currently, we only distribute to isolate the part of the loop with
// dependence cycles to enable partial vectorization.
@@ -860,19 +862,50 @@ private:
Optional<bool> IsForced;
};
+/// Shared implementation between new and old PMs.
+static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
+ ScalarEvolution *SE, OptimizationRemarkEmitter *ORE,
+ std::function<const LoopAccessInfo &(Loop &)> &GetLAA,
+ bool ProcessAllLoops) {
+ // Build up a worklist of inner-loops to vectorize. This is necessary as the
+ // act of distributing a loop creates new loops and can invalidate iterators
+ // across the loops.
+ SmallVector<Loop *, 8> Worklist;
+
+ for (Loop *TopLevelLoop : *LI)
+ for (Loop *L : depth_first(TopLevelLoop))
+ // We only handle inner-most loops.
+ if (L->empty())
+ Worklist.push_back(L);
+
+ // Now walk the identified inner loops.
+ bool Changed = false;
+ for (Loop *L : Worklist) {
+ LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
+
+ // If distribution was forced for the specific loop to be
+ // enabled/disabled, follow that. Otherwise use the global flag.
+ if (LDL.isForced().getValueOr(ProcessAllLoops))
+ Changed |= LDL.processLoop(GetLAA);
+ }
+
+ // Process each loop nest in the function.
+ return Changed;
+}
+
/// \brief The pass class.
-class LoopDistribute : public FunctionPass {
+class LoopDistributeLegacy : public FunctionPass {
public:
/// \p ProcessAllLoopsByDefault specifies whether loop distribution should be
/// performed by default. Pass -enable-loop-distribute={0,1} overrides this
/// default. We use this to keep LoopDistribution off by default when invoked
/// from the optimization pipeline but on when invoked explicitly from opt.
- LoopDistribute(bool ProcessAllLoopsByDefault = true)
+ LoopDistributeLegacy(bool ProcessAllLoopsByDefault = true)
: FunctionPass(ID), ProcessAllLoops(ProcessAllLoopsByDefault) {
// The default is set by the caller.
if (EnableLoopDistribute.getNumOccurrences() > 0)
ProcessAllLoops = EnableLoopDistribute;
- initializeLoopDistributePass(*PassRegistry::getPassRegistry());
+ initializeLoopDistributeLegacyPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
@@ -884,31 +917,10 @@ public:
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ std::function<const LoopAccessInfo &(Loop &)> GetLAA =
+ [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
- // Build up a worklist of inner-loops to vectorize. This is necessary as the
- // act of distributing a loop creates new loops and can invalidate iterators
- // across the loops.
- SmallVector<Loop *, 8> Worklist;
-
- for (Loop *TopLevelLoop : *LI)
- for (Loop *L : depth_first(TopLevelLoop))
- // We only handle inner-most loops.
- if (L->empty())
- Worklist.push_back(L);
-
- // Now walk the identified inner loops.
- bool Changed = false;
- for (Loop *L : Worklist) {
- LoopDistributeForLoop LDL(L, &F, LI, DT, SE, ORE);
-
- // If distribution was forced for the specific loop to be
- // enabled/disabled, follow that. Otherwise use the global flag.
- if (LDL.isForced().getValueOr(ProcessAllLoops))
- Changed |= LDL.processLoop(LAA);
- }
-
- // Process each loop nest in the function.
- return Changed;
+ return runImpl(F, LI, DT, SE, ORE, GetLAA, ProcessAllLoops);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -930,19 +942,49 @@ private:
};
} // anonymous namespace
-char LoopDistribute::ID;
+PreservedAnalyses LoopDistributePass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ // FIXME: This does not currently match the behavior from the old PM.
+ // ProcessAllLoops with the old PM defaults to true when invoked from opt and
+ // false when invoked from the optimization pipeline.
+ bool ProcessAllLoops = false;
+ if (EnableLoopDistribute.getNumOccurrences() > 0)
+ ProcessAllLoops = EnableLoopDistribute;
+
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
+ auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
+ std::function<const LoopAccessInfo &(Loop &)> GetLAA =
+ [&](Loop &L) -> const LoopAccessInfo & {
+ return LAM.getResult<LoopAccessAnalysis>(L);
+ };
+
+ bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA, ProcessAllLoops);
+ if (!Changed)
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
+char LoopDistributeLegacy::ID;
static const char ldist_name[] = "Loop Distribition";
-INITIALIZE_PASS_BEGIN(LoopDistribute, LDIST_NAME, ldist_name, false, false)
+INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
+ false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(LoopDistribute, LDIST_NAME, ldist_name, false, false)
+INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
namespace llvm {
FunctionPass *createLoopDistributePass(bool ProcessAllLoopsByDefault) {
- return new LoopDistribute(ProcessAllLoopsByDefault);
+ return new LoopDistributeLegacy(ProcessAllLoopsByDefault);
}
}
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index dd87273e1c4..f235b12e49c 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -86,7 +86,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializePlaceBackedgeSafepointsImplPass(Registry);
initializePlaceSafepointsPass(Registry);
initializeFloat2IntLegacyPassPass(Registry);
- initializeLoopDistributePass(Registry);
+ initializeLoopDistributeLegacyPass(Registry);
initializeLoopLoadEliminationPass(Registry);
initializeLoopSimplifyCFGLegacyPassPass(Registry);
initializeLoopVersioningPassPass(Registry);
diff --git a/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll b/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll
index a67a71c5cfb..819c002e282 100644
--- a/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll
+++ b/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll
@@ -5,6 +5,11 @@
; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \
; RUN: -debug-only=block-freq < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
+; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
+; RUN: -debug-only=block-freq -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS
+; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
+; RUN: -debug-only=block-freq < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
+
; REQUIRES: asserts
; HOTNESS: block-frequency: forced
diff --git a/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll b/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll
index 21ddfcd6f52..8689bfeebc6 100644
--- a/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll
+++ b/llvm/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll
@@ -3,6 +3,11 @@
; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \
; RUN: < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
+; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
+; RUN: -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS
+; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
+; RUN: < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
+
; REQUIRES: asserts
; This is the input program:
OpenPOWER on IntegriCloud