diff options
| author | Dorit Nuzman <dorit.nuzman@intel.com> | 2018-10-22 06:17:09 +0000 |
|---|---|---|
| committer | Dorit Nuzman <dorit.nuzman@intel.com> | 2018-10-22 06:17:09 +0000 |
| commit | 3ec99fe21bbc44d0a3ff898644f71aa2e1e8d6ef (patch) | |
| tree | 611aadca17d90c373ca52fd74c2c9300313ac141 /llvm/lib | |
| parent | 2336dc3c51c8883a1ef171a4236c448b54f6993c (diff) | |
| download | bcm5719-llvm-3ec99fe21bbc44d0a3ff898644f71aa2e1e8d6ef.tar.gz bcm5719-llvm-3ec99fe21bbc44d0a3ff898644f71aa2e1e8d6ef.zip | |
[IAI,LV] Avoid creating a scalar epilogue due to gaps in interleave-groups when
optimizing for size
LV is careful to respect -Os and not to create a scalar epilog in all cases
(runtime tests, trip-counts that require a remainder loop) except for peeling
due to gaps in interleave-groups. This patch fixes that; -Os will now have us
invalidate such interleave-groups and vectorize without an epilog.
The patch also removes a related FIXME comment that is now obsolete, and was
also inaccurate:
"FIXME: return None if loop requiresScalarEpilog(<MaxVF>), or look for a smaller
MaxVF that does not require a scalar epilog."
(requiresScalarEpilog() has nothing to do with VF).
Reviewers: Ayal, hsaito, dcaballe, fhahn
Reviewed By: Ayal
Differential Revision: https://reviews.llvm.org/D53420
llvm-svn: 344883
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Analysis/VectorUtils.cpp | 24 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 10 |
2 files changed, 32 insertions, 2 deletions
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 5fd6fe0ef31..8b6702c8544 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -919,3 +919,27 @@ void InterleavedAccessInfo::analyzeInterleaving( } } } + +void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() { + // If no group had triggered the requirement to create an epilogue loop, + // there is nothing to do. + if (!requiresScalarEpilogue()) + return; + + // Avoid releasing a Group twice. + SmallPtrSet<InterleaveGroup *, 4> DelSet; + for (auto &I : InterleaveGroupMap) { + InterleaveGroup *Group = I.second; + if (Group->requiresScalarEpilogue()) + DelSet.insert(Group); + } + for (auto *Ptr : DelSet) { + LLVM_DEBUG( + dbgs() + << "LV: Invalidate candidate interleaved group due to gaps that " + "require a scalar epilogue.\n"); + releaseGroup(Ptr); + } + + RequiresScalarEpilogue = false; +} diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index a395183398d..daaa1e27c8e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4599,6 +4599,14 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { return None; } + // Record that scalar epilogue is not allowed. + LLVM_DEBUG(dbgs() << "LV: Not inserting scalar epilogue for access with gaps " + "due to -Os/-Oz.\n"); + + // We don't create an epilogue when optimizing for size. + // Invalidate interleave groups that require an epilogue. + InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); + unsigned MaxVF = computeFeasibleMaxVF(OptForSize, TC); if (TC > 0 && TC % MaxVF == 0) { @@ -4610,8 +4618,6 @@ Optional<unsigned> LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { // found modulo the vectorization factor is not zero, try to fold the tail // by masking. // FIXME: look for a smaller MaxVF that does divide TC rather than masking. - // FIXME: return None if loop requiresScalarEpilog(<MaxVF>), or look for a - // smaller MaxVF that does not require a scalar epilog. if (Legal->canFoldTailByMasking()) { FoldTailByMasking = true; return MaxVF; |

