summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Scalar/LoopFuse.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopFuse.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFuse.cpp180
1 files changed, 107 insertions, 73 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 0bc2bcff2ae..558400c24a1 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -66,7 +66,7 @@ using namespace llvm;
#define DEBUG_TYPE "loop-fusion"
-STATISTIC(FuseCounter, "Count number of loop fusions performed");
+STATISTIC(FuseCounter, "Loops fused");
STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion");
STATISTIC(InvalidPreheader, "Loop has invalid preheader");
STATISTIC(InvalidHeader, "Loop has invalid header");
@@ -79,12 +79,12 @@ STATISTIC(MayThrowException, "Loop may throw an exception");
STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access");
STATISTIC(NotSimplifiedForm, "Loop is not in simplified form");
STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
-STATISTIC(InvalidTripCount,
- "Loop does not have invariant backedge taken count");
+STATISTIC(UnknownTripCount, "Loop has unknown trip count");
STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
-STATISTIC(NonEqualTripCount, "Candidate trip counts are not the same");
-STATISTIC(NonAdjacent, "Candidates are not adjacent");
-STATISTIC(NonEmptyPreheader, "Candidate has a non-empty preheader");
+STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");
+STATISTIC(NonAdjacent, "Loops are not adjacent");
+STATISTIC(NonEmptyPreheader, "Loop has a non-empty preheader");
+STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
enum FusionDependenceAnalysisChoice {
FUSION_DEPENDENCE_ANALYSIS_SCEV,
@@ -151,11 +151,14 @@ struct FusionCandidate {
const DominatorTree *DT;
const PostDominatorTree *PDT;
+ OptimizationRemarkEmitter &ORE;
+
FusionCandidate(Loop *L, const DominatorTree *DT,
- const PostDominatorTree *PDT)
+ const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE)
: Preheader(L->getLoopPreheader()), Header(L->getHeader()),
ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
- Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT) {
+ Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT),
+ ORE(ORE) {
// Walk over all blocks in the loop and check for conditions that may
// prevent fusion. For each block, walk over all instructions and collect
@@ -163,28 +166,28 @@ struct FusionCandidate {
// found, invalidate this object and return.
for (BasicBlock *BB : L->blocks()) {
if (BB->hasAddressTaken()) {
- AddressTakenBB++;
invalidate();
+ reportInvalidCandidate(AddressTakenBB);
return;
}
for (Instruction &I : *BB) {
if (I.mayThrow()) {
- MayThrowException++;
invalidate();
+ reportInvalidCandidate(MayThrowException);
return;
}
if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
if (SI->isVolatile()) {
- ContainsVolatileAccess++;
invalidate();
+ reportInvalidCandidate(ContainsVolatileAccess);
return;
}
}
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
if (LI->isVolatile()) {
- ContainsVolatileAccess++;
invalidate();
+ reportInvalidCandidate(ContainsVolatileAccess);
return;
}
}
@@ -227,6 +230,44 @@ struct FusionCandidate {
}
#endif
+ /// Determine if a fusion candidate (representing a loop) is eligible for
+ /// fusion. Note that this only checks whether a single loop can be fused - it
+ /// does not check whether it is *legal* to fuse two loops together.
+ bool isEligibleForFusion(ScalarEvolution &SE) const {
+ if (!isValid()) {
+ LLVM_DEBUG(dbgs() << "FC has invalid CFG requirements!\n");
+ if (!Preheader)
+ ++InvalidPreheader;
+ if (!Header)
+ ++InvalidHeader;
+ if (!ExitingBlock)
+ ++InvalidExitingBlock;
+ if (!ExitBlock)
+ ++InvalidExitBlock;
+ if (!Latch)
+ ++InvalidLatch;
+ if (L->isInvalid())
+ ++InvalidLoop;
+
+ return false;
+ }
+
+ // Require ScalarEvolution to be able to determine a trip count.
+ if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
+ LLVM_DEBUG(dbgs() << "Loop " << L->getName()
+ << " trip count not computable!\n");
+ return reportInvalidCandidate(UnknownTripCount);
+ }
+
+ if (!L->isLoopSimplifyForm()) {
+ LLVM_DEBUG(dbgs() << "Loop " << L->getName()
+ << " is not in simplified form!\n");
+ return reportInvalidCandidate(NotSimplifiedForm);
+ }
+
+ return true;
+ }
+
private:
// This is only used internally for now, to clear the MemWrites and MemReads
// list and setting Valid to false. I can't envision other uses of this right
@@ -239,6 +280,17 @@ private:
MemReads.clear();
Valid = false;
}
+
+ bool reportInvalidCandidate(llvm::Statistic &Stat) const {
+ using namespace ore;
+ assert(L && Preheader && "Fusion candidate not initialized properly!");
+ ++Stat;
+ ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, Stat.getName(),
+ L->getStartLoc(), Preheader)
+ << "[" << Preheader->getParent()->getName() << "]: "
+ << "Loop is not a candidate for fusion: " << Stat.getDesc());
+ return false;
+ }
};
inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
@@ -391,16 +443,6 @@ static void printLoopVector(const LoopVector &LV) {
}
#endif
-static void reportLoopFusion(const FusionCandidate &FC0,
- const FusionCandidate &FC1,
- OptimizationRemarkEmitter &ORE) {
- using namespace ore;
- ORE.emit(
- OptimizationRemark(DEBUG_TYPE, "LoopFusion", FC0.Preheader->getParent())
- << "Fused " << NV("Cand1", StringRef(FC0.Preheader->getName()))
- << " with " << NV("Cand2", StringRef(FC1.Preheader->getName())));
-}
-
struct LoopFuser {
private:
// Sets of control flow equivalent fusion candidates for a given nest level.
@@ -506,53 +548,13 @@ private:
return false;
}
- /// Determine if a fusion candidate (representing a loop) is eligible for
- /// fusion. Note that this only checks whether a single loop can be fused - it
- /// does not check whether it is *legal* to fuse two loops together.
- bool eligibleForFusion(const FusionCandidate &FC) const {
- if (!FC.isValid()) {
- LLVM_DEBUG(dbgs() << "FC " << FC << " has invalid CFG requirements!\n");
- if (!FC.Preheader)
- InvalidPreheader++;
- if (!FC.Header)
- InvalidHeader++;
- if (!FC.ExitingBlock)
- InvalidExitingBlock++;
- if (!FC.ExitBlock)
- InvalidExitBlock++;
- if (!FC.Latch)
- InvalidLatch++;
- if (FC.L->isInvalid())
- InvalidLoop++;
-
- return false;
- }
-
- // Require ScalarEvolution to be able to determine a trip count.
- if (!SE.hasLoopInvariantBackedgeTakenCount(FC.L)) {
- LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
- << " trip count not computable!\n");
- InvalidTripCount++;
- return false;
- }
-
- if (!FC.L->isLoopSimplifyForm()) {
- LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
- << " is not in simplified form!\n");
- NotSimplifiedForm++;
- return false;
- }
-
- return true;
- }
-
/// Iterate over all loops in the given loop set and identify the loops that
/// are eligible for fusion. Place all eligible fusion candidates into Control
/// Flow Equivalent sets, sorted by dominance.
void collectFusionCandidates(const LoopVector &LV) {
for (Loop *L : LV) {
- FusionCandidate CurrCand(L, &DT, &PDT);
- if (!eligibleForFusion(CurrCand))
+ FusionCandidate CurrCand(L, &DT, &PDT, ORE);
+ if (!CurrCand.isEligibleForFusion(SE))
continue;
// Go through each list in FusionCandidates and determine if L is control
@@ -664,14 +666,15 @@ private:
if (!identicalTripCounts(*FC0, *FC1)) {
LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
"counts. Not fusing.\n");
- NonEqualTripCount++;
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ NonEqualTripCount);
continue;
}
if (!isAdjacent(*FC0, *FC1)) {
LLVM_DEBUG(dbgs()
<< "Fusion candidates are not adjacent. Not fusing.\n");
- NonAdjacent++;
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1, NonAdjacent);
continue;
}
@@ -683,12 +686,15 @@ private:
if (!isEmptyPreheader(*FC1)) {
LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
"preheader. Not fusing.\n");
- NonEmptyPreheader++;
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ NonEmptyPreheader);
continue;
}
if (!dependencesAllowFusion(*FC0, *FC1)) {
LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ InvalidDependencies);
continue;
}
@@ -696,9 +702,11 @@ private:
LLVM_DEBUG(dbgs()
<< "\tFusion appears to be "
<< (BeneficialToFuse ? "" : "un") << "profitable!\n");
- if (!BeneficialToFuse)
+ if (!BeneficialToFuse) {
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ FusionNotBeneficial);
continue;
-
+ }
// All analysis has completed and has determined that fusion is legal
// and profitable. At this point, start transforming the code and
// perform fusion.
@@ -710,15 +718,14 @@ private:
// Note this needs to be done *before* performFusion because
// performFusion will change the original loops, making it not
// possible to identify them after fusion is complete.
- reportLoopFusion(*FC0, *FC1, ORE);
+ reportLoopFusion<OptimizationRemark>(*FC0, *FC1, FuseCounter);
- FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT);
+ FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT, ORE);
FusedCand.verify();
- assert(eligibleForFusion(FusedCand) &&
+ assert(FusedCand.isEligibleForFusion(SE) &&
"Fused candidate should be eligible for fusion!");
// Notify the loop-depth-tree that these loops are not valid objects
- // anymore.
LDT.removeLoop(FC1->L);
CandidateSet.erase(FC0);
@@ -1137,6 +1144,33 @@ private:
return FC0.L;
}
+
+ /// Report details on loop fusion opportunities.
+ ///
+ /// This template function can be used to report both successful and missed
+ /// loop fusion opportunities, based on the RemarkKind. The RemarkKind should
+ /// be one of:
+ /// - OptimizationRemarkMissed to report when loop fusion is unsuccessful
+ /// given two valid fusion candidates.
+ /// - OptimizationRemark to report successful fusion of two fusion
+ /// candidates.
+ /// The remarks will be printed using the form:
+ /// <path/filename>:<line number>:<column number>: [<function name>]:
+ /// <Cand1 Preheader> and <Cand2 Preheader>: <Stat Description>
+ template <typename RemarkKind>
+ void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1,
+ llvm::Statistic &Stat) {
+ assert(FC0.Preheader && FC1.Preheader &&
+ "Expecting valid fusion candidates");
+ using namespace ore;
+ ++Stat;
+ ORE.emit(RemarkKind(DEBUG_TYPE, Stat.getName(), FC0.L->getStartLoc(),
+ FC0.Preheader)
+ << "[" << FC0.Preheader->getParent()->getName()
+ << "]: " << NV("Cand1", StringRef(FC0.Preheader->getName()))
+ << " and " << NV("Cand2", StringRef(FC1.Preheader->getName()))
+ << ": " << Stat.getDesc());
+ }
};
struct LoopFuseLegacy : public FunctionPass {
OpenPOWER on IntegriCloud