summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorDorit Nuzman <dorit.nuzman@intel.com>2018-10-22 06:17:09 +0000
committerDorit Nuzman <dorit.nuzman@intel.com>2018-10-22 06:17:09 +0000
commit3ec99fe21bbc44d0a3ff898644f71aa2e1e8d6ef (patch)
tree611aadca17d90c373ca52fd74c2c9300313ac141 /llvm/lib
parent2336dc3c51c8883a1ef171a4236c448b54f6993c (diff)
downloadbcm5719-llvm-3ec99fe21bbc44d0a3ff898644f71aa2e1e8d6ef.tar.gz
bcm5719-llvm-3ec99fe21bbc44d0a3ff898644f71aa2e1e8d6ef.zip
[IAI,LV] Avoid creating a scalar epilogue due to gaps in interleave-groups when
optimizing for size LV is careful to respect -Os and not to create a scalar epilog in all cases (runtime tests, trip-counts that require a remainder loop) except for peeling due to gaps in interleave-groups. This patch fixes that; -Os will now have us invalidate such interleave-groups and vectorize without an epilog. The patch also removes a related FIXME comment that is now obsolete, and was also inaccurate: "FIXME: return None if loop requiresScalarEpilog(<MaxVF>), or look for a smaller MaxVF that does not require a scalar epilog." (requiresScalarEpilog() has nothing to do with VF). Reviewers: Ayal, hsaito, dcaballe, fhahn Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D53420 llvm-svn: 344883
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp24
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp10
2 files changed, 32 insertions, 2 deletions
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 5fd6fe0ef31..8b6702c8544 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -919,3 +919,27 @@ void InterleavedAccessInfo::analyzeInterleaving(
}
}
}
+
+void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {
+ // If no group had triggered the requirement to create an epilogue loop,
+ // there is nothing to do.
+ if (!requiresScalarEpilogue())
+ return;
+
+ // Avoid releasing a Group twice.
+ SmallPtrSet<InterleaveGroup *, 4> DelSet;
+ for (auto &I : InterleaveGroupMap) {
+ InterleaveGroup *Group = I.second;
+ if (Group->requiresScalarEpilogue())
+ DelSet.insert(Group);
+ }
+ for (auto *Ptr : DelSet) {
+ LLVM_DEBUG(
+ dbgs()
+ << "LV: Invalidate candidate interleaved group due to gaps that "
+ "require a scalar epilogue.\n");
+ releaseGroup(Ptr);
+ }
+
+ RequiresScalarEpilogue = false;
+}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a395183398d..daaa1e27c8e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4599,6 +4599,14 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
return None;
}
+ // Record that scalar epilogue is not allowed.
+ LLVM_DEBUG(dbgs() << "LV: Not inserting scalar epilogue for access with gaps "
+ "due to -Os/-Oz.\n");
+
+ // We don't create an epilogue when optimizing for size.
+ // Invalidate interleave groups that require an epilogue.
+ InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
+
unsigned MaxVF = computeFeasibleMaxVF(OptForSize, TC);
if (TC > 0 && TC % MaxVF == 0) {
@@ -4610,8 +4618,6 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) {
// found modulo the vectorization factor is not zero, try to fold the tail
// by masking.
// FIXME: look for a smaller MaxVF that does divide TC rather than masking.
- // FIXME: return None if loop requiresScalarEpilog(<MaxVF>), or look for a
- // smaller MaxVF that does not require a scalar epilog.
if (Legal->canFoldTailByMasking()) {
FoldTailByMasking = true;
return MaxVF;
OpenPOWER on IntegriCloud