diff options
| author | Geoff Berry <gberry@codeaurora.org> | 2017-06-28 18:53:09 +0000 |
|---|---|---|
| committer | Geoff Berry <gberry@codeaurora.org> | 2017-06-28 18:53:09 +0000 |
| commit | 378374d45735f2f1ca74e46988d003c17188d5ad (patch) | |
| tree | d9852a821809b5f8b3ea2e87ef682fe76b737c52 /llvm/lib | |
| parent | e0a68355f2fb4735a2dd5676ea456793f79b7cba (diff) | |
| download | bcm5719-llvm-378374d45735f2f1ca74e46988d003c17188d5ad.tar.gz bcm5719-llvm-378374d45735f2f1ca74e46988d003c17188d5ad.zip | |
[AArch64][Falkor] Try to avoid exhausting HW prefetcher resources when unrolling.
Reviewers: t.p.northover, mcrosier
Subscribers: aemerson, rengolin, javed.absar, kristof.beyls, llvm-commits
Differential Revision: https://reviews.llvm.org/D34533
llvm-svn: 306584
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index f5d038d52e1..d81ccbd39ad 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -20,6 +20,9 @@ using namespace llvm; #define DEBUG_TYPE "aarch64tti" +static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", + cl::init(true), cl::Hidden); + bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, const Function *Callee) const { const TargetMachine &TM = getTLI()->getTargetMachine(); @@ -645,6 +648,58 @@ unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) { return ST->getMaxInterleaveFactor(); } +// For Falkor, we want to avoid having too many strided loads in a loop since +// that can exhaust the HW prefetcher resources. We adjust the unroller +// MaxCount preference below to attempt to ensure unrolling doesn't create too +// many strided loads. +static void +getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TargetTransformInfo::UnrollingPreferences &UP) { + const int MaxStridedLoads = 7; + auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) { + int StridedLoads = 0; + // FIXME? We could make this more precise by looking at the CFG and + // e.g. not counting loads in each side of an if-then-else diamond. + for (const auto BB : L->blocks()) { + for (auto &I : *BB) { + LoadInst *LMemI = dyn_cast<LoadInst>(&I); + if (!LMemI) + continue; + + Value *PtrValue = LMemI->getPointerOperand(); + if (L->isLoopInvariant(PtrValue)) + continue; + + const SCEV *LSCEV = SE.getSCEV(PtrValue); + const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); + if (!LSCEVAddRec || !LSCEVAddRec->isAffine()) + continue; + + // FIXME? We could take pairing of unrolled load copies into account + // by looking at the AddRec, but we would probably have to limit this + // to loops with no stores or other memory optimization barriers. + ++StridedLoads; + // We've seen enough strided loads that seeing more won't make a + // difference. + if (StridedLoads > MaxStridedLoads / 2) + return StridedLoads; + } + } + return StridedLoads; + }; + + int StridedLoads = countStridedLoads(L, SE); + DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoads + << " strided loads\n"); + // Pick the largest power of 2 unroll count that won't result in too many + // strided loads. + if (StridedLoads) { + UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads); + DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to " << UP.MaxCount + << '\n'); + } +} + void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { // Enable partial unrolling and runtime unrolling. @@ -658,6 +713,10 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, // Disable partial & runtime unrolling on -Os. UP.PartialOptSizeThreshold = 0; + + if (ST->getProcFamily() == AArch64Subtarget::Falkor && + EnableFalkorHWPFUnrollFix) + getFalkorUnrollingPreferences(L, SE, UP); } Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, |

