[LV] Fold-tail flag

This is the compiler-flag equivalent of the Predicate pragma (https://reviews.llvm.org/D65197), to direct the vectorizer to fold the remainder-loop into the main-loop using predication. Differential Revision: https://reviews.llvm.org/D66108 Reviewers: Ayal, hsaito, fhahn, SjoerdMeije llvm-svn: 368801
author: Dorit Nuzman <dorit.nuzman@intel.com> 2019-08-14 05:22:20 +0000
committer: Dorit Nuzman <dorit.nuzman@intel.com> 2019-08-14 05:22:20 +0000
commit: 491ca2425d4a58bf1d732c5a051e5714a23ca198 (patch)
tree: 462abb94f4317c583b2be1399db2f29818f9aae7 /llvm/lib/Transforms
parent: d4edd9d97e4e9200d885297c79b7e7b55961dae8 (diff)
download: bcm5719-llvm-491ca2425d4a58bf1d732c5a051e5714a23ca198.tar.gz
bcm5719-llvm-491ca2425d4a58bf1d732c5a051e5714a23ca198.zip
1 files changed, 13 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1f8cbf7f340..20305416079 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -177,6 +177,14 @@ static cl::opt<unsigned> TinyTripCountVectorThreshold(
              "value are vectorized only if no scalar iteration overheads "
              "are incurred."));
 
+// Indicates that an epilogue is undesired, predication is preferred.
+// This means that the vectorizer will try to fold the loop-tail (epilogue)
+// into the loop and predicate the loop body accordingly.
+static cl::opt<bool> PreferPredicateOverEpilog(
+    "prefer-predicate-over-epilog", cl::init(false), cl::Hidden,
+    cl::desc("Indicate that an epilogue is undesired, predication should be "
+             "used instead."));
+
 static cl::opt<bool> MaximizeBandwidth(
     "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
     cl::desc("Maximize bandwidth when selecting vectorization factor which "
@@ -906,7 +914,7 @@ enum ScalarEpilogueLowering {
   CM_ScalarEpilogueNotAllowedLowTripLoop,
 
   // Loop hint predicate indicating an epilogue is undesired.
-  CM_ScalarEpilogueNotNeededPredicatePragma
+  CM_ScalarEpilogueNotNeededUsePredicate
 };
 
 /// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -4804,9 +4812,9 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF() {
   switch (ScalarEpilogueStatus) {
   case CM_ScalarEpilogueAllowed:
     return computeFeasibleMaxVF(TC);
-  case CM_ScalarEpilogueNotNeededPredicatePragma:
+  case CM_ScalarEpilogueNotNeededUsePredicate:
     LLVM_DEBUG(
-        dbgs() << "LV: vector predicate hint found.\n"
+        dbgs() << "LV: vector predicate hint/switch found.\n"
                << "LV: Not allowing scalar epilogue, creating predicated "
                << "vector loop.\n");
     break;
@@ -7298,8 +7306,8 @@ getScalarEpilogueLowering(Function *F, Loop *L, LoopVectorizeHints &Hints,
       (F->hasOptSize() ||
        llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI)))
     SEL = CM_ScalarEpilogueNotAllowedOptSize;
-  else if (Hints.getPredicate())
-    SEL = CM_ScalarEpilogueNotNeededPredicatePragma;
+  else if (PreferPredicateOverEpilog || Hints.getPredicate()) 
+    SEL = CM_ScalarEpilogueNotNeededUsePredicate;
 
   return SEL;
 }
author	Dorit Nuzman <dorit.nuzman@intel.com>	2019-08-14 05:22:20 +0000
committer	Dorit Nuzman <dorit.nuzman@intel.com>	2019-08-14 05:22:20 +0000
commit	491ca2425d4a58bf1d732c5a051e5714a23ca198 (patch)
tree	462abb94f4317c583b2be1399db2f29818f9aae7 /llvm/lib/Transforms
parent	d4edd9d97e4e9200d885297c79b7e7b55961dae8 (diff)
download	bcm5719-llvm-491ca2425d4a58bf1d732c5a051e5714a23ca198.tar.gz bcm5719-llvm-491ca2425d4a58bf1d732c5a051e5714a23ca198.zip