summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorGeoff Berry <gberry@codeaurora.org>2017-06-28 18:53:09 +0000
committerGeoff Berry <gberry@codeaurora.org>2017-06-28 18:53:09 +0000
commit378374d45735f2f1ca74e46988d003c17188d5ad (patch)
treed9852a821809b5f8b3ea2e87ef682fe76b737c52 /llvm/lib
parente0a68355f2fb4735a2dd5676ea456793f79b7cba (diff)
downloadbcm5719-llvm-378374d45735f2f1ca74e46988d003c17188d5ad.tar.gz
bcm5719-llvm-378374d45735f2f1ca74e46988d003c17188d5ad.zip
[AArch64][Falkor] Try to avoid exhausting HW prefetcher resources when unrolling.
Reviewers: t.p.northover, mcrosier Subscribers: aemerson, rengolin, javed.absar, kristof.beyls, llvm-commits Differential Revision: https://reviews.llvm.org/D34533 llvm-svn: 306584
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp59
1 files changed, 59 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index f5d038d52e1..d81ccbd39ad 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -20,6 +20,9 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64tti"
+static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
+ cl::init(true), cl::Hidden);
+
bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
const Function *Callee) const {
const TargetMachine &TM = getTLI()->getTargetMachine();
@@ -645,6 +648,58 @@ unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
return ST->getMaxInterleaveFactor();
}
+// For Falkor, we want to avoid having too many strided loads in a loop since
+// that can exhaust the HW prefetcher resources. We adjust the unroller
+// MaxCount preference below to attempt to ensure unrolling doesn't create too
+// many strided loads.
+static void
+getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TargetTransformInfo::UnrollingPreferences &UP) {
+ const int MaxStridedLoads = 7;
+ auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
+ int StridedLoads = 0;
+ // FIXME? We could make this more precise by looking at the CFG and
+ // e.g. not counting loads in each side of an if-then-else diamond.
+ for (const auto BB : L->blocks()) {
+ for (auto &I : *BB) {
+ LoadInst *LMemI = dyn_cast<LoadInst>(&I);
+ if (!LMemI)
+ continue;
+
+ Value *PtrValue = LMemI->getPointerOperand();
+ if (L->isLoopInvariant(PtrValue))
+ continue;
+
+ const SCEV *LSCEV = SE.getSCEV(PtrValue);
+ const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
+ if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
+ continue;
+
+ // FIXME? We could take pairing of unrolled load copies into account
+ // by looking at the AddRec, but we would probably have to limit this
+ // to loops with no stores or other memory optimization barriers.
+ ++StridedLoads;
+ // We've seen enough strided loads that seeing more won't make a
+ // difference.
+ if (StridedLoads > MaxStridedLoads / 2)
+ return StridedLoads;
+ }
+ }
+ return StridedLoads;
+ };
+
+ int StridedLoads = countStridedLoads(L, SE);
+ DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads
+ << " strided loads\n");
+ // Pick the largest power of 2 unroll count that won't result in too many
+ // strided loads.
+ if (StridedLoads) {
+ UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
+ DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to " << UP.MaxCount
+ << '\n');
+ }
+}
+
void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// Enable partial unrolling and runtime unrolling.
@@ -658,6 +713,10 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// Disable partial & runtime unrolling on -Os.
UP.PartialOptSizeThreshold = 0;
+
+ if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
+ EnableFalkorHWPFUnrollFix)
+ getFalkorUnrollingPreferences(L, SE, UP);
}
Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
OpenPOWER on IntegriCloud