diff options
| author | Adam Nemet <anemet@apple.com> | 2016-03-18 00:27:43 +0000 |
|---|---|---|
| committer | Adam Nemet <anemet@apple.com> | 2016-03-18 00:27:43 +0000 |
| commit | 709e3046ee3c473b373fe5ec61e4d6e467991898 (patch) | |
| tree | 48c80521787e640be1608d9991f919853facfb6f /llvm | |
| parent | 6d8beeca5302984e845d9c6d7bf0a9e4a5ca98f9 (diff) | |
| download | bcm5719-llvm-709e3046ee3c473b373fe5ec61e4d6e467991898.tar.gz bcm5719-llvm-709e3046ee3c473b373fe5ec61e4d6e467991898.zip | |
[LoopDataPrefetch] Add TTI to limit the number of iterations to prefetch ahead
Summary:
It can hurt performance to prefetch ahead too much. Be conservative for
now and don't prefetch ahead more than 3 iterations on Cyclone.
Reviewers: hfinkel
Subscribers: llvm-commits, mzolotukhin
Differential Revision: http://reviews.llvm.org/D17949
llvm-svn: 263772
Diffstat (limited to 'llvm')
7 files changed, 35 insertions, 1 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 76a4315c102..57d0cf47d6b 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -428,6 +428,11 @@ public: /// adding SW prefetches. The default is 1, i.e. prefetch with any stride. unsigned getMinPrefetchStride() const; + /// \return The maximum number of iterations to prefetch ahead. If the + /// required number of iterations is more than this number, no prefetching is + /// performed. + unsigned getMaxPrefetchIterationsAhead() const; + /// \return The maximum interleave factor that any transform should try to /// perform for this target. This number depends on the level of parallelism /// and the number of execution units in the CPU. @@ -624,6 +629,7 @@ public: virtual unsigned getCacheLineSize() = 0; virtual unsigned getPrefetchDistance() = 0; virtual unsigned getMinPrefetchStride() = 0; + virtual unsigned getMaxPrefetchIterationsAhead() = 0; virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, @@ -797,6 +803,9 @@ public: unsigned getMinPrefetchStride() override { return Impl.getMinPrefetchStride(); } + unsigned getMaxPrefetchIterationsAhead() override { + return Impl.getMaxPrefetchIterationsAhead(); + } unsigned getMaxInterleaveFactor(unsigned VF) override { return Impl.getMaxInterleaveFactor(VF); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 96dc9445f56..39eba4d6e58 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -270,6 +270,8 @@ public: unsigned getMinPrefetchStride() { return 1; } + unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; } + unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 8fd9fbf8196..b64d4133420 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -227,6 +227,10 @@ unsigned TargetTransformInfo::getMinPrefetchStride() const { return TTIImpl->getMinPrefetchStride(); } +unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const { + return TTIImpl->getMaxPrefetchIterationsAhead(); +} + unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const { return TTIImpl->getMaxInterleaveFactor(VF); } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index aee298998f3..2b3fae958f4 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -31,6 +31,13 @@ static cl::opt<unsigned> CycloneMinPrefetchStride( cl::desc("Min stride to add prefetches for Cyclone"), cl::init(2048), cl::Hidden); +// Be conservative for now and don't prefetch ahead too much since the loop +// may terminate early. +static cl::opt<unsigned> CycloneMaxPrefetchIterationsAhead( + "cyclone-max-prefetch-iters-ahead", + cl::desc("Max number of iterations to prefetch ahead on Cyclone"), + cl::init(3), cl::Hidden); + /// \brief Calculate the cost of materializing a 64-bit value. This helper /// method might only calculate a fraction of a larger immediate. Therefore it /// is valid to return a cost of ZERO. @@ -602,3 +609,9 @@ unsigned AArch64TTIImpl::getMinPrefetchStride() { return CycloneMinPrefetchStride; return BaseT::getMinPrefetchStride(); } + +unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() { + if (ST->isCyclone()) + return CycloneMaxPrefetchIterationsAhead; + return BaseT::getMaxPrefetchIterationsAhead(); +} diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index a54db00e7d3..93a84b7a992 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -133,6 +133,8 @@ public: unsigned getPrefetchDistance(); unsigned getMinPrefetchStride(); + + unsigned getMaxPrefetchIterationsAhead(); /// @} }; diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp index 8e0322150b3..f55f3193dde 100644 --- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -171,6 +171,9 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) { if (!ItersAhead) ItersAhead = 1; + if (ItersAhead > TTI->getMaxPrefetchIterationsAhead()) + return MadeChange; + DEBUG(dbgs() << "Prefetching " << ItersAhead << " iterations ahead (loop size: " << LoopSize << ") in " << L->getHeader()->getParent()->getName() << ": " << *L); diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll b/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll index 4e0b9c0a224..437d9415f56 100644 --- a/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll +++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll @@ -1,4 +1,5 @@ -; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL +; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -cyclone-max-prefetch-iters-ahead=100 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL +; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL ; RUN: opt -mcpu=generic -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128" |

