summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorLei Huang <lei@ca.ibm.com>2017-10-12 16:43:33 +0000
committerLei Huang <lei@ca.ibm.com>2017-10-12 16:43:33 +0000
commit0724fea2da637883f1461e12ff46d596a816f758 (patch)
tree167aa66c20867839db6bc0c1556d29ee9845e528 /llvm/lib
parentc8ffffe4625bc0b0f790824eaa2a591943dff862 (diff)
downloadbcm5719-llvm-0724fea2da637883f1461e12ff46d596a816f758.tar.gz
bcm5719-llvm-0724fea2da637883f1461e12ff46d596a816f758.zip
[PowerPC] Add profitablilty check for conversion to mtctr loops
Add profitability checks for modifying counted loops to use the mtctr instruction. The latency of mtctr is only justified if there are more than 4 comparisons that will be removed as a result. Usually counted loops are formed relatively early and before unrolling, so most low trip count loops often don't survive. However we want to ensure that if they do, we do not mistakenly update them to mtctr loops. Use CodeMetrics to ensure we are only doing this for small loops with small trip counts. Differential Revision: https://reviews.llvm.org/D38212 llvm-svn: 315592
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/PowerPC/PPCCTRLoops.cpp33
1 files changed, 32 insertions, 1 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index 8d61e81b1fc..8784a831902 100644
--- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -26,12 +26,17 @@
#include "PPC.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
+#include "PPCTargetTransformInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
@@ -64,6 +69,13 @@ using namespace llvm;
static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
#endif
+// The latency of mtctr is only justified if there are more than 4
+// comparisons that will be removed as a result.
+static cl::opt<unsigned>
+SmallCTRLoopThreshold("min-ctr-loop-threshold", cl::init(4), cl::Hidden,
+ cl::desc("Loops with a constant trip count smaller than "
+ "this value will not use the count register."));
+
STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
namespace llvm {
@@ -95,6 +107,8 @@ namespace {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
}
private:
@@ -107,10 +121,12 @@ namespace {
const PPCTargetLowering *TLI;
const DataLayout *DL;
const TargetLibraryInfo *LibInfo;
+ const TargetTransformInfo *TTI;
LoopInfo *LI;
ScalarEvolution *SE;
DominatorTree *DT;
bool PreserveLCSSA;
+ TargetSchedModel SchedModel;
};
char PPCCTRLoops::ID = 0;
@@ -179,6 +195,7 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DL = &F.getParent()->getDataLayout();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
@@ -462,10 +479,24 @@ bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
return false;
}
-
bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
bool MadeChange = false;
+ // Do not convert small short loops to CTR loop.
+ unsigned ConstTripCount = SE->getSmallConstantTripCount(L);
+ if (ConstTripCount && ConstTripCount < SmallCTRLoopThreshold) {
+ SmallPtrSet<const Value *, 32> EphValues;
+ auto AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
+ *L->getHeader()->getParent());
+ CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
+ CodeMetrics Metrics;
+ for (BasicBlock *BB : L->blocks())
+ Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
+ // 6 is an approximate latency for the mtctr instruction.
+ if (Metrics.NumInsts <= (6 * SchedModel.getIssueWidth()))
+ return false;
+ }
+
// Process nested loops first.
for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
MadeChange |= convertToCTRLoop(*I);
OpenPOWER on IntegriCloud