[TTI][LV] preferPredicateOverEpilogue

We have two ways to steer creating a predicated vector body over creating a scalar epilogue. To force this, we have 1) a command line option and 2) a pragma available. This adds a third: a target hook to TargetTransformInfo that can be queried whether predication is preferred or not, which allows the vectoriser to make the decision without forcing it. While this change behaves as a non-functional change for now, it shows the required TTI plumbing, usage of this new hook in the vectoriser, and the beginning of an ARM MVE implementation. I will follow up on this with: - a complete MVE implementation, see D69845. - a patch to disable this, i.e. we should respect "vector_predicate(disable)" and its corresponding loophint. Differential Revision: https://reviews.llvm.org/D69040
author: Sjoerd Meijer <sjoerd.meijer@arm.com> 2019-11-06 09:58:36 +0000
committer: Sjoerd Meijer <sjoerd.meijer@arm.com> 2019-11-06 10:14:20 +0000
commit: 6c2a4f5ff93e16c3b86c18543e02a193ced2d956 (patch)
tree: d2cb3ef09bdcc0e963f75f4dd5cfa58705d450da /llvm/lib
parent: 9577ee84e638530be7a310c9d50526a36e3c212e (diff)
download: bcm5719-llvm-6c2a4f5ff93e16c3b86c18543e02a193ced2d956.tar.gz
bcm5719-llvm-6c2a4f5ff93e16c3b86c18543e02a193ced2d956.zip
4 files changed, 70 insertions, 5 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index ba89a9eebdb..0b409840351 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -243,6 +243,12 @@ bool TargetTransformInfo::isHardwareLoopProfitable(
   return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
 }
 
+bool TargetTransformInfo::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
+    ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI,
+    DominatorTree *DT, const LoopAccessInfo *LAI) const {
+  return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
+}
+
 void TargetTransformInfo::getUnrollingPreferences(
     Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
   return TTIImpl->getUnrollingPreferences(L, SE, UP);
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index ed1d6e5ca36..eb698375985 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1000,6 +1000,50 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
   return true;
 }
 
+bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
+                                             ScalarEvolution &SE,
+                                             AssumptionCache &AC,
+                                             TargetLibraryInfo *TLI,
+                                             DominatorTree *DT,
+                                             const LoopAccessInfo *LAI) {
+  // Creating a predicated vector loop is the first step for generating a
+  // tail-predicated hardware loop, for which we need the MVE masked
+  // load/stores instructions:
+  if (!ST->hasMVEIntegerOps())
+    return false;
+
+  HardwareLoopInfo HWLoopInfo(L);
+  if (!HWLoopInfo.canAnalyze(*LI)) {
+    LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
+                         "analyzable.\n");
+    return false;
+  }
+
+  // This checks if we have the low-overhead branch architecture
+  // extension, and if we will create a hardware-loop:
+  if (!isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
+    LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
+                         "profitable.\n");
+    return false;
+  }
+
+  if (!HWLoopInfo.isHardwareLoopCandidate(SE, *LI, *DT)) {
+    LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
+                         "a candidate.\n");
+    return false;
+  }
+
+  // TODO: to set up a tail-predicated loop, which works by setting up
+  // the total number of elements processed by the loop, we need to
+  // determine the element size here, and if it is uniform for all operations
+  // in the vector loop. This means we will reject narrowing/widening
+  // operations, and don't want to predicate the vector loop, which is
+  // the main prep step for tail-predicated loops.
+
+  return false;
+}
+
+
 void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                          TTI::UnrollingPreferences &UP) {
   // Only currently enable these preferences for M-Class cores.
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index c4e1a17d80c..5bb3bcaf10e 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -203,7 +203,12 @@ public:
                                 AssumptionCache &AC,
                                 TargetLibraryInfo *LibInfo,
                                 HardwareLoopInfo &HWLoopInfo);
-
+  bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
+                                   ScalarEvolution &SE,
+                                   AssumptionCache &AC,
+                                   TargetLibraryInfo *TLI,
+                                   DominatorTree *DT,
+                                   const LoopAccessInfo *LAI);
   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
                                TTI::UnrollingPreferences &UP);
 
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9b6223cbbdc..f10f0f3320d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7423,13 +7423,18 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
 
 static ScalarEpilogueLowering
 getScalarEpilogueLowering(Function *F, Loop *L, LoopVectorizeHints &Hints,
-                          ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+                          ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
+                          TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
+                          AssumptionCache *AC, LoopInfo *LI,
+                          ScalarEvolution *SE, DominatorTree *DT,
+                          const LoopAccessInfo *LAI) {
   ScalarEpilogueLowering SEL = CM_ScalarEpilogueAllowed;
   if (Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
       (F->hasOptSize() ||
        llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI)))
     SEL = CM_ScalarEpilogueNotAllowedOptSize;
-  else if (PreferPredicateOverEpilog || Hints.getPredicate()) 
+  else if (PreferPredicateOverEpilog || Hints.getPredicate() ||
+           TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, LAI))
     SEL = CM_ScalarEpilogueNotNeededUsePredicate;
 
   return SEL;
@@ -7449,7 +7454,10 @@ static bool processLoopInVPlanNativePath(
   assert(EnableVPlanNativePath && "VPlan-native path is disabled.");
   Function *F = L->getHeader()->getParent();
   InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI());
-  ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI);
+
+  ScalarEpilogueLowering SEL =
+    getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, AC, LI,
+                              PSE.getSE(), DT, LVL->getLAI());
 
   LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F,
                                 &Hints, IAI);
@@ -7541,7 +7549,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
 
   // Check the function attributes and profiles to find out if this function
   // should be optimized for size.
-  ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI);
+  ScalarEpilogueLowering SEL =
+    getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, AC, LI,
+                              PSE.getSE(), DT, LVL.getLAI());
 
   // Entrance to the VPlan-native vectorization path. Outer loops are processed
   // here. They may require CFG and instruction level transformations before
author	Sjoerd Meijer <sjoerd.meijer@arm.com>	2019-11-06 09:58:36 +0000
committer	Sjoerd Meijer <sjoerd.meijer@arm.com>	2019-11-06 10:14:20 +0000
commit	6c2a4f5ff93e16c3b86c18543e02a193ced2d956 (patch)
tree	d2cb3ef09bdcc0e963f75f4dd5cfa58705d450da /llvm/lib
parent	9577ee84e638530be7a310c9d50526a36e3c212e (diff)
download	bcm5719-llvm-6c2a4f5ff93e16c3b86c18543e02a193ced2d956.tar.gz bcm5719-llvm-6c2a4f5ff93e16c3b86c18543e02a193ced2d956.zip