From 3ec99fe21bbc44d0a3ff898644f71aa2e1e8d6ef Mon Sep 17 00:00:00 2001 From: Dorit Nuzman Date: Mon, 22 Oct 2018 06:17:09 +0000 Subject: [IAI,LV] Avoid creating a scalar epilogue due to gaps in interleave-groups when optimizing for size LV is careful to respect -Os and not to create a scalar epilog in all cases (runtime tests, trip-counts that require a remainder loop) except for peeling due to gaps in interleave-groups. This patch fixes that; -Os will now have us invalidate such interleave-groups and vectorize without an epilog. The patch also removes a related FIXME comment that is now obsolete, and was also inaccurate: "FIXME: return None if loop requiresScalarEpilog(), or look for a smaller MaxVF that does not require a scalar epilog." (requiresScalarEpilog() has nothing to do with VF). Reviewers: Ayal, hsaito, dcaballe, fhahn Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D53420 llvm-svn: 344883 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'llvm/lib/Transforms/Vectorize') diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index a395183398d..daaa1e27c8e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4599,6 +4599,14 @@ Optional LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { return None; } + // Record that scalar epilogue is not allowed. + LLVM_DEBUG(dbgs() << "LV: Not inserting scalar epilogue for access with gaps " + "due to -Os/-Oz.\n"); + + // We don't create an epilogue when optimizing for size. + // Invalidate interleave groups that require an epilogue. + InterleaveInfo.invalidateGroupsRequiringScalarEpilogue(); + unsigned MaxVF = computeFeasibleMaxVF(OptForSize, TC); if (TC > 0 && TC % MaxVF == 0) { @@ -4610,8 +4618,6 @@ Optional LoopVectorizationCostModel::computeMaxVF(bool OptForSize) { // found modulo the vectorization factor is not zero, try to fold the tail // by masking. // FIXME: look for a smaller MaxVF that does divide TC rather than masking. - // FIXME: return None if loop requiresScalarEpilog(), or look for a - // smaller MaxVF that does not require a scalar epilog. if (Legal->canFoldTailByMasking()) { FoldTailByMasking = true; return MaxVF; -- cgit v1.2.3