[LoopDataPrefetch] Add TTI to limit the number of iterations to prefetch ahead

Summary: It can hurt performance to prefetch ahead too much. Be conservative for now and don't prefetch ahead more than 3 iterations on Cyclone. Reviewers: hfinkel Subscribers: llvm-commits, mzolotukhin Differential Revision: http://reviews.llvm.org/D17949 llvm-svn: 263772
author: Adam Nemet <anemet@apple.com> 2016-03-18 00:27:43 +0000
committer: Adam Nemet <anemet@apple.com> 2016-03-18 00:27:43 +0000
commit: 709e3046ee3c473b373fe5ec61e4d6e467991898 (patch)
tree: 48c80521787e640be1608d9991f919853facfb6f /llvm
parent: 6d8beeca5302984e845d9c6d7bf0a9e4a5ca98f9 (diff)
download: bcm5719-llvm-709e3046ee3c473b373fe5ec61e4d6e467991898.tar.gz
bcm5719-llvm-709e3046ee3c473b373fe5ec61e4d6e467991898.zip
7 files changed, 35 insertions, 1 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 76a4315c102..57d0cf47d6b 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -428,6 +428,11 @@ public:
   /// adding SW prefetches.  The default is 1, i.e. prefetch with any stride.
   unsigned getMinPrefetchStride() const;
 
+  /// \return The maximum number of iterations to prefetch ahead.  If the
+  /// required number of iterations is more than this number, no prefetching is
+  /// performed.
+  unsigned getMaxPrefetchIterationsAhead() const;
+
   /// \return The maximum interleave factor that any transform should try to
   /// perform for this target. This number depends on the level of parallelism
   /// and the number of execution units in the CPU.
@@ -624,6 +629,7 @@ public:
   virtual unsigned getCacheLineSize() = 0;
   virtual unsigned getPrefetchDistance() = 0;
   virtual unsigned getMinPrefetchStride() = 0;
+  virtual unsigned getMaxPrefetchIterationsAhead() = 0;
   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
   virtual unsigned
   getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
@@ -797,6 +803,9 @@ public:
   unsigned getMinPrefetchStride() override {
     return Impl.getMinPrefetchStride();
   }
+  unsigned getMaxPrefetchIterationsAhead() override {
+    return Impl.getMaxPrefetchIterationsAhead();
+  }
   unsigned getMaxInterleaveFactor(unsigned VF) override {
     return Impl.getMaxInterleaveFactor(VF);
   }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 96dc9445f56..39eba4d6e58 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -270,6 +270,8 @@ public:
 
   unsigned getMinPrefetchStride() { return 1; }
 
+  unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
+
   unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
 
   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 8fd9fbf8196..b64d4133420 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -227,6 +227,10 @@ unsigned TargetTransformInfo::getMinPrefetchStride() const {
   return TTIImpl->getMinPrefetchStride();
 }
 
+unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
+  return TTIImpl->getMaxPrefetchIterationsAhead();
+}
+
 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
   return TTIImpl->getMaxInterleaveFactor(VF);
 }
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index aee298998f3..2b3fae958f4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -31,6 +31,13 @@ static cl::opt<unsigned> CycloneMinPrefetchStride(
     cl::desc("Min stride to add prefetches for Cyclone"),
     cl::init(2048), cl::Hidden);
 
+// Be conservative for now and don't prefetch ahead too much since the loop
+// may terminate early.
+static cl::opt<unsigned> CycloneMaxPrefetchIterationsAhead(
+    "cyclone-max-prefetch-iters-ahead",
+    cl::desc("Max number of iterations to prefetch ahead on Cyclone"),
+    cl::init(3), cl::Hidden);
+
 /// \brief Calculate the cost of materializing a 64-bit value. This helper
 /// method might only calculate a fraction of a larger immediate. Therefore it
 /// is valid to return a cost of ZERO.
@@ -602,3 +609,9 @@ unsigned AArch64TTIImpl::getMinPrefetchStride() {
     return CycloneMinPrefetchStride;
   return BaseT::getMinPrefetchStride();
 }
+
+unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
+  if (ST->isCyclone())
+    return CycloneMaxPrefetchIterationsAhead;
+  return BaseT::getMaxPrefetchIterationsAhead();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index a54db00e7d3..93a84b7a992 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -133,6 +133,8 @@ public:
   unsigned getPrefetchDistance();
 
   unsigned getMinPrefetchStride();
+
+  unsigned getMaxPrefetchIterationsAhead();
   /// @}
 };
 
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 8e0322150b3..f55f3193dde 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -171,6 +171,9 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
   if (!ItersAhead)
     ItersAhead = 1;
 
+  if (ItersAhead > TTI->getMaxPrefetchIterationsAhead())
+    return MadeChange;
+
   DEBUG(dbgs() << "Prefetching " << ItersAhead
                << " iterations ahead (loop size: " << LoopSize << ") in "
                << L->getHeader()->getParent()->getName() << ": " << *L);
diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll b/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
index 4e0b9c0a224..437d9415f56 100644
--- a/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
+++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
@@ -1,4 +1,5 @@
-; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -cyclone-max-prefetch-iters-ahead=100 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
 ; RUN: opt -mcpu=generic -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
author	Adam Nemet <anemet@apple.com>	2016-03-18 00:27:43 +0000
committer	Adam Nemet <anemet@apple.com>	2016-03-18 00:27:43 +0000
commit	709e3046ee3c473b373fe5ec61e4d6e467991898 (patch)
tree	48c80521787e640be1608d9991f919853facfb6f /llvm
parent	6d8beeca5302984e845d9c6d7bf0a9e4a5ca98f9 (diff)
download	bcm5719-llvm-709e3046ee3c473b373fe5ec61e4d6e467991898.tar.gz bcm5719-llvm-709e3046ee3c473b373fe5ec61e4d6e467991898.zip