summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
diff options
context:
space:
mode:
authorAyal Zaks <ayal.zaks@intel.com>2018-10-18 15:03:15 +0000
committerAyal Zaks <ayal.zaks@intel.com>2018-10-18 15:03:15 +0000
commitb0b5312e677ccbe568ffe4ea8247c4384d30b000 (patch)
tree8842218ea6576623d45b67fcf7125a660841b436 /llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
parenta1e6e65b9fd68490db04530a45c9333cf69b6213 (diff)
downloadbcm5719-llvm-b0b5312e677ccbe568ffe4ea8247c4384d30b000.tar.gz
bcm5719-llvm-b0b5312e677ccbe568ffe4ea8247c4384d30b000.zip
[LV] Fold tail by masking to vectorize loops of arbitrary trip count under opt for size
When optimizing for size, a loop is vectorized only if the resulting vector loop completely replaces the original scalar loop. This holds if no runtime guards are needed, if the original trip-count TC does not overflow, and if TC is a known constant that is a multiple of the VF. The last two TC-related conditions can be overcome by 1. rounding the trip-count of the vector loop up from TC to a multiple of VF; 2. masking the vector body under a newly introduced "if (i <= TC-1)" condition. The patch allows loops with arbitrary trip counts to be vectorized under -Os, subject to the existing cost model considerations. It also applies to loops with small trip counts (under -O2) which are currently handled as if under -Os. The patch does not handle loops with reductions, live-outs, or w/o a primary induction variable, and disallows interleave groups. (Third, final and main part of -) Differential Revision: https://reviews.llvm.org/D50480 llvm-svn: 344743
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp55
1 files changed, 55 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index bde90a71b41..755ad32a7bf 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1134,4 +1134,59 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
return Result;
}
+bool LoopVectorizationLegality::canFoldTailByMasking() {
+
+ LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
+
+ if (!PrimaryInduction) {
+ ORE->emit(createMissedAnalysis("NoPrimaryInduction")
+ << "Missing a primary induction variable in the loop, which is "
+ << "needed in order to fold tail by masking as required.");
+ LLVM_DEBUG(dbgs() << "LV: No primary induction, cannot fold tail by "
+ << "masking.\n");
+ return false;
+ }
+
+ // TODO: handle reductions when tail is folded by masking.
+ if (!Reductions.empty()) {
+ ORE->emit(createMissedAnalysis("ReductionFoldingTailByMasking")
+ << "Cannot fold tail by masking in the presence of reductions.");
+ LLVM_DEBUG(dbgs() << "LV: Loop has reductions, cannot fold tail by "
+ << "masking.\n");
+ return false;
+ }
+
+ // TODO: handle outside users when tail is folded by masking.
+ for (auto *AE : AllowedExit) {
+ // Check that all users of allowed exit values are inside the loop.
+ for (User *U : AE->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (TheLoop->contains(UI))
+ continue;
+ ORE->emit(createMissedAnalysis("LiveOutFoldingTailByMasking")
+ << "Cannot fold tail by masking in the presence of live outs.");
+ LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop has an "
+ << "outside user for : " << *UI << '\n');
+ return false;
+ }
+ }
+
+ // The list of pointers that we can safely read and write to remains empty.
+ SmallPtrSet<Value *, 8> SafePointers;
+
+ // Check and mark all blocks for predication, including those that ordinarily
+ // do not need predication such as the header block.
+ for (BasicBlock *BB : TheLoop->blocks()) {
+ if (!blockCanBePredicated(BB, SafePointers)) {
+ ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
+ << "control flow cannot be substituted for a select");
+ LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as required.\n");
+ return false;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
+ return true;
+}
+
} // namespace llvm
OpenPOWER on IntegriCloud