[LoopDataPrefetch/Aarch64] Allow selective prefetching of large-strided accesses

Summary: And use this TTI for Cyclone. As it was explained in the original RFC (http://thread.gmane.org/gmane.comp.compilers.llvm.devel/92758), the HW prefetcher work up to 2KB strides. I am also adding tests for this and the previous change (D17943): * Cyclone prefetching accesses with a large stride * Cyclone not prefetching accesses with a small stride * Generic Aarch64 subtarget not prefetching either Reviewers: hfinkel Subscribers: aemerson, rengolin, llvm-commits, mzolotukhin Differential Revision: http://reviews.llvm.org/D17945 llvm-svn: 263771
author: Adam Nemet <anemet@apple.com> 2016-03-18 00:27:38 +0000
committer: Adam Nemet <anemet@apple.com> 2016-03-18 00:27:38 +0000
commit: 6d8beeca5302984e845d9c6d7bf0a9e4a5ca98f9 (patch)
tree: b08c60faf3269ff2160ae9f7e5fd185f56d4d643 /llvm/lib/Transforms
parent: 53e758fc55687de06d905d34e7cb58a9560093d2 (diff)
download: bcm5719-llvm-6d8beeca5302984e845d9c6d7bf0a9e4a5ca98f9.tar.gz
bcm5719-llvm-6d8beeca5302984e845d9c6d7bf0a9e4a5ca98f9.zip
1 files changed, 25 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 3d25e099c3e..8e0322150b3 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -73,6 +73,10 @@ namespace {
     bool runOnFunction(Function &F) override;
     bool runOnLoop(Loop *L);
 
+    /// \brief Check if the the stride of the accesses is large enough to
+    /// warrant a prefetch.
+    bool isStrideLargeEnough(const SCEVAddRecExpr *AR);
+
   private:
     AssumptionCache *AC;
     LoopInfo *LI;
@@ -94,6 +98,22 @@ INITIALIZE_PASS_END(LoopDataPrefetch, "loop-data-prefetch",
 
 FunctionPass *llvm::createLoopDataPrefetchPass() { return new LoopDataPrefetch(); }
 
+bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR) {
+  unsigned TargetMinStride = TTI->getMinPrefetchStride();
+  // No need to check if any stride goes.
+  if (TargetMinStride <= 1)
+    return true;
+
+  const auto *ConstStride = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+  // If MinStride is set, don't prefetch unless we can ensure that stride is
+  // larger.
+  if (!ConstStride)
+    return false;
+
+  unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
+  return TargetMinStride <= AbsStride;
+}
+
 bool LoopDataPrefetch::runOnFunction(Function &F) {
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
@@ -184,6 +204,11 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
       if (!LSCEVAddRec)
         continue;
 
+      // Check if the the stride of the accesses is large enough to warrant a
+      // prefetch.
+      if (!isStrideLargeEnough(LSCEVAddRec))
+        continue;
+
       // We don't want to double prefetch individual cache lines. If this load
       // is known to be within one cache line of some other load that has
       // already been prefetched, then don't prefetch this one as well.
author	Adam Nemet <anemet@apple.com>	2016-03-18 00:27:38 +0000
committer	Adam Nemet <anemet@apple.com>	2016-03-18 00:27:38 +0000
commit	6d8beeca5302984e845d9c6d7bf0a9e4a5ca98f9 (patch)
tree	b08c60faf3269ff2160ae9f7e5fd185f56d4d643 /llvm/lib/Transforms
parent	53e758fc55687de06d905d34e7cb58a9560093d2 (diff)
download	bcm5719-llvm-6d8beeca5302984e845d9c6d7bf0a9e4a5ca98f9.tar.gz bcm5719-llvm-6d8beeca5302984e845d9c6d7bf0a9e4a5ca98f9.zip