summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorAdam Nemet <anemet@apple.com>2016-03-18 00:27:43 +0000
committerAdam Nemet <anemet@apple.com>2016-03-18 00:27:43 +0000
commit709e3046ee3c473b373fe5ec61e4d6e467991898 (patch)
tree48c80521787e640be1608d9991f919853facfb6f /llvm
parent6d8beeca5302984e845d9c6d7bf0a9e4a5ca98f9 (diff)
downloadbcm5719-llvm-709e3046ee3c473b373fe5ec61e4d6e467991898.tar.gz
bcm5719-llvm-709e3046ee3c473b373fe5ec61e4d6e467991898.zip
[LoopDataPrefetch] Add TTI to limit the number of iterations to prefetch ahead
Summary: It can hurt performance to prefetch ahead too much. Be conservative for now and don't prefetch ahead more than 3 iterations on Cyclone. Reviewers: hfinkel Subscribers: llvm-commits, mzolotukhin Differential Revision: http://reviews.llvm.org/D17949 llvm-svn: 263772
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h9
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h2
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp13
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h2
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp3
-rw-r--r--llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll3
7 files changed, 35 insertions, 1 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 76a4315c102..57d0cf47d6b 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -428,6 +428,11 @@ public:
/// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
unsigned getMinPrefetchStride() const;
+ /// \return The maximum number of iterations to prefetch ahead. If the
+ /// required number of iterations is more than this number, no prefetching is
+ /// performed.
+ unsigned getMaxPrefetchIterationsAhead() const;
+
/// \return The maximum interleave factor that any transform should try to
/// perform for this target. This number depends on the level of parallelism
/// and the number of execution units in the CPU.
@@ -624,6 +629,7 @@ public:
virtual unsigned getCacheLineSize() = 0;
virtual unsigned getPrefetchDistance() = 0;
virtual unsigned getMinPrefetchStride() = 0;
+ virtual unsigned getMaxPrefetchIterationsAhead() = 0;
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
virtual unsigned
getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
@@ -797,6 +803,9 @@ public:
unsigned getMinPrefetchStride() override {
return Impl.getMinPrefetchStride();
}
+ unsigned getMaxPrefetchIterationsAhead() override {
+ return Impl.getMaxPrefetchIterationsAhead();
+ }
unsigned getMaxInterleaveFactor(unsigned VF) override {
return Impl.getMaxInterleaveFactor(VF);
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 96dc9445f56..39eba4d6e58 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -270,6 +270,8 @@ public:
unsigned getMinPrefetchStride() { return 1; }
+ unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
+
unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 8fd9fbf8196..b64d4133420 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -227,6 +227,10 @@ unsigned TargetTransformInfo::getMinPrefetchStride() const {
return TTIImpl->getMinPrefetchStride();
}
+unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
+ return TTIImpl->getMaxPrefetchIterationsAhead();
+}
+
unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
return TTIImpl->getMaxInterleaveFactor(VF);
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index aee298998f3..2b3fae958f4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -31,6 +31,13 @@ static cl::opt<unsigned> CycloneMinPrefetchStride(
cl::desc("Min stride to add prefetches for Cyclone"),
cl::init(2048), cl::Hidden);
+// Be conservative for now and don't prefetch ahead too much since the loop
+// may terminate early.
+static cl::opt<unsigned> CycloneMaxPrefetchIterationsAhead(
+ "cyclone-max-prefetch-iters-ahead",
+ cl::desc("Max number of iterations to prefetch ahead on Cyclone"),
+ cl::init(3), cl::Hidden);
+
/// \brief Calculate the cost of materializing a 64-bit value. This helper
/// method might only calculate a fraction of a larger immediate. Therefore it
/// is valid to return a cost of ZERO.
@@ -602,3 +609,9 @@ unsigned AArch64TTIImpl::getMinPrefetchStride() {
return CycloneMinPrefetchStride;
return BaseT::getMinPrefetchStride();
}
+
+unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
+ if (ST->isCyclone())
+ return CycloneMaxPrefetchIterationsAhead;
+ return BaseT::getMaxPrefetchIterationsAhead();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index a54db00e7d3..93a84b7a992 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -133,6 +133,8 @@ public:
unsigned getPrefetchDistance();
unsigned getMinPrefetchStride();
+
+ unsigned getMaxPrefetchIterationsAhead();
/// @}
};
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 8e0322150b3..f55f3193dde 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -171,6 +171,9 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
if (!ItersAhead)
ItersAhead = 1;
+ if (ItersAhead > TTI->getMaxPrefetchIterationsAhead())
+ return MadeChange;
+
DEBUG(dbgs() << "Prefetching " << ItersAhead
<< " iterations ahead (loop size: " << LoopSize << ") in "
<< L->getHeader()->getParent()->getName() << ": " << *L);
diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll b/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
index 4e0b9c0a224..437d9415f56 100644
--- a/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
+++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -cyclone-max-prefetch-iters-ahead=100 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
; RUN: opt -mcpu=generic -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
OpenPOWER on IntegriCloud