diff options
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCCTRLoops.cpp')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCCTRLoops.cpp | 33 |
1 files changed, 32 insertions, 1 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp index 8d61e81b1fc..8784a831902 100644 --- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -26,12 +26,17 @@ #include "PPC.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" +#include "PPCTargetTransformInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" @@ -64,6 +69,13 @@ using namespace llvm; static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); #endif +// The latency of mtctr is only justified if there are more than 4 +// comparisons that will be removed as a result. +static cl::opt<unsigned> +SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden, + cl::desc("Loops with a constant trip count smaller than " + "this value will not use the count register.")); + STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops"); namespace llvm { @@ -95,6 +107,8 @@ namespace { AU.addRequired<DominatorTreeWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); AU.addRequired<ScalarEvolutionWrapperPass>(); + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); } private: @@ -107,10 +121,12 @@ namespace { const PPCTargetLowering *TLI; const DataLayout *DL; const TargetLibraryInfo *LibInfo; + const TargetTransformInfo *TTI; LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; bool PreserveLCSSA; + TargetSchedModel SchedModel; }; char PPCCTRLoops::ID = 0; @@ -179,6 +195,7 @@ bool PPCCTRLoops::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); DL = &F.getParent()->getDataLayout(); auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); LibInfo = TLIP ? &TLIP->getTLI() : nullptr; @@ -462,10 +479,24 @@ bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) { return false; } - bool PPCCTRLoops::convertToCTRLoop(Loop *L) { bool MadeChange = false; + // Do not convert small short loops to CTR loop. + unsigned ConstTripCount = SE->getSmallConstantTripCount(L); + if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) { + SmallPtrSet<const Value *, 32> EphValues; + auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( + *L->getHeader()->getParent()); + CodeMetrics::collectEphemeralValues(L, &AC, EphValues); + CodeMetrics Metrics; + for (BasicBlock *BB : L->blocks()) + Metrics.analyzeBasicBlock(BB, *TTI, EphValues); + // 6 is an approximate latency for the mtctr instruction. + if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth())) + return false; + } + // Process nested loops first. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) { MadeChange |= convertToCTRLoop(*I); |