[IAI,LV] Avoid creating a scalar epilogue due to gaps in interleave-groups when

optimizing for size LV is careful to respect -Os and not to create a scalar epilog in all cases (runtime tests, trip-counts that require a remainder loop) except for peeling due to gaps in interleave-groups. This patch fixes that; -Os will now have us invalidate such interleave-groups and vectorize without an epilog. The patch also removes a related FIXME comment that is now obsolete, and was also inaccurate: "FIXME: return None if loop requiresScalarEpilog(<MaxVF>), or look for a smaller MaxVF that does not require a scalar epilog." (requiresScalarEpilog() has nothing to do with VF). Reviewers: Ayal, hsaito, dcaballe, fhahn Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D53420 llvm-svn: 344883
author: Dorit Nuzman <dorit.nuzman@intel.com> 2018-10-22 06:17:09 +0000
committer: Dorit Nuzman <dorit.nuzman@intel.com> 2018-10-22 06:17:09 +0000
commit: 3ec99fe21bbc44d0a3ff898644f71aa2e1e8d6ef (patch)
tree: 611aadca17d90c373ca52fd74c2c9300313ac141 /llvm/lib
parent: 2336dc3c51c8883a1ef171a4236c448b54f6993c (diff)
download: bcm5719-llvm-3ec99fe21bbc44d0a3ff898644f71aa2e1e8d6ef.tar.gz
bcm5719-llvm-3ec99fe21bbc44d0a3ff898644f71aa2e1e8d6ef.zip
2 files changed, 32 insertions, 2 deletions
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 5fd6fe0ef31..8b6702c8544 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -919,3 +919,27 @@ void InterleavedAccessInfo::analyzeInterleaving(
     }
   }
 }
+
+void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {
+  // If no group had triggered the requirement to create an epilogue loop,
+  // there is nothing to do.
+  if (!requiresScalarEpilogue())
+    return;
+
+  // Avoid releasing a Group twice.
+  SmallPtrSet<InterleaveGroup *, 4> DelSet;
+  for (auto &I : InterleaveGroupMap) {
+    InterleaveGroup *Group = I.second;
+    if (Group->requiresScalarEpilogue())
+      DelSet.insert(Group);
+  }
+  for (auto *Ptr : DelSet) {
+    LLVM_DEBUG(
+        dbgs() 
+        << "LV: Invalidate candidate interleaved group due to gaps that "
+           "require a scalar epilogue.\n");
+    releaseGroup(Ptr);
+  }
+
+  RequiresScalarEpilogue = false;
+}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a395183398d..daaa1e27c8e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4599,6 +4599,14 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
     return None;
   }
 
+  // Record that scalar epilogue is not allowed.
+  LLVM_DEBUG(dbgs() << "LV: Not inserting scalar epilogue for access with gaps "
+                       "due to -Os/-Oz.\n");
+
+  // We don't create an epilogue when optimizing for size.
+  // Invalidate interleave groups that require an epilogue.
+  InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
+
   unsigned MaxVF = computeFeasibleMaxVF(OptForSize, TC);
 
   if (TC > 0 && TC % MaxVF == 0) {
@@ -4610,8 +4618,6 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
   // found modulo the vectorization factor is not zero, try to fold the tail
   // by masking.
   // FIXME: look for a smaller MaxVF that does divide TC rather than masking.
-  // FIXME: return None if loop requiresScalarEpilog(<MaxVF>), or look for a
-  //        smaller MaxVF that does not require a scalar epilog.
   if (Legal->canFoldTailByMasking()) {
     FoldTailByMasking = true;
     return MaxVF;
author	Dorit Nuzman <dorit.nuzman@intel.com>	2018-10-22 06:17:09 +0000
committer	Dorit Nuzman <dorit.nuzman@intel.com>	2018-10-22 06:17:09 +0000
commit	3ec99fe21bbc44d0a3ff898644f71aa2e1e8d6ef (patch)
tree	611aadca17d90c373ca52fd74c2c9300313ac141 /llvm/lib
parent	2336dc3c51c8883a1ef171a4236c448b54f6993c (diff)
download	bcm5719-llvm-3ec99fe21bbc44d0a3ff898644f71aa2e1e8d6ef.tar.gz bcm5719-llvm-3ec99fe21bbc44d0a3ff898644f71aa2e1e8d6ef.zip