summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSjoerd Meijer <sjoerd.meijer@arm.com>2020-01-09 09:14:00 +0000
committerSjoerd Meijer <sjoerd.meijer@arm.com>2020-01-09 09:14:00 +0000
commit8f1887456ab4ba24a62ccb19d0d04b08972a0289 (patch)
tree3a92e6feac6510155c993b9550c913d57860f0fc /llvm/lib
parent08778d8c4fd8a6519c7f27bfa6b09c47262cb844 (diff)
downloadbcm5719-llvm-8f1887456ab4ba24a62ccb19d0d04b08972a0289.tar.gz
bcm5719-llvm-8f1887456ab4ba24a62ccb19d0d04b08972a0289.zip
[LV] Still vectorise when tail-folding can't find a primary inducation variable
This addresses a vectorisation regression for tail-folded loops that are counting down, e.g. loops as simple as this: void foo(char *A, char *B, char *C, uint32_t N) { while (N > 0) { *C++ = *A++ + *B++; N--; } } These are loops that can be vectorised, but when tail-folding is requested, it can't find a primary induction variable which we do need for predicating the loop. As a result, the loop isn't vectorised at all, which it is able to do when tail-folding is not attempted. So, this adds a check for the primary induction variable where we decide how to lower the scalar epilogue. I.e., when there isn't a primary induction variable, a scalar epilogue loop is allowed (i.e. don't request tail-folding) so that vectorisation could still be triggered. Having this check for the primary induction variable make sense anyway, and in addition, in a follow-up of this I will look into discovering earlier the primary induction variable for counting down loops, so that this can also be tail-folded. Differential revision: https://reviews.llvm.org/D72324
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp63
1 files changed, 37 insertions, 26 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index fd30d52a562..0400e44dd0e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7502,30 +7502,43 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
}
-static ScalarEpilogueLowering
-getScalarEpilogueLowering(Function *F, Loop *L, LoopVectorizeHints &Hints,
- ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
- TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
- AssumptionCache *AC, LoopInfo *LI,
- ScalarEvolution *SE, DominatorTree *DT,
- const LoopAccessInfo *LAI) {
- ScalarEpilogueLowering SEL = CM_ScalarEpilogueAllowed;
+// Determine how to lower the scalar epilogue, which depends on 1) optimising
+// for minimum code-size, 2) predicate compiler options, 3) loop hints forcing
+// predication, and 4) a TTI hook that analyses whether the loop is suitable
+// for predication.
+static ScalarEpilogueLowering getScalarEpilogueLowering(
+ Function *F, Loop *L, LoopVectorizeHints &Hints, ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
+ AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
+ LoopVectorizationLegality &LVL) {
+ bool OptSize =
+ F->hasOptSize() || llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
+ PGSOQueryType::IRPass);
+ // 1) OptSize takes precedence over all other options, i.e. if this is set,
+ // don't look at hints or options, and don't request a scalar epilogue.
+ if (OptSize && Hints.getForce() != LoopVectorizeHints::FK_Enabled)
+ return CM_ScalarEpilogueNotAllowedOptSize;
+
bool PredicateOptDisabled = PreferPredicateOverEpilog.getNumOccurrences() &&
!PreferPredicateOverEpilog;
- if (Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
- (F->hasOptSize() ||
- llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
- PGSOQueryType::IRPass)))
- SEL = CM_ScalarEpilogueNotAllowedOptSize;
- else if (PreferPredicateOverEpilog ||
- Hints.getPredicate() == LoopVectorizeHints::FK_Enabled ||
- (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, LAI) &&
- Hints.getPredicate() != LoopVectorizeHints::FK_Disabled &&
- !PredicateOptDisabled))
- SEL = CM_ScalarEpilogueNotNeededUsePredicate;
+ // 2) Next, if disabling predication is requested on the command line, honour
+ // this and request a scalar epilogue. Also do this if we don't have a
+ // primary induction variable, which is required for predication.
+ if (PredicateOptDisabled || !LVL.getPrimaryInduction())
+ return CM_ScalarEpilogueAllowed;
+
+ // 3) and 4) look if enabling predication is requested on the command line,
+ // with a loop hint, or if the TTI hook indicates this is profitable, request
+ // predication .
+ if (PreferPredicateOverEpilog ||
+ Hints.getPredicate() == LoopVectorizeHints::FK_Enabled ||
+ (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT,
+ LVL.getLAI()) &&
+ Hints.getPredicate() != LoopVectorizeHints::FK_Disabled))
+ return CM_ScalarEpilogueNotNeededUsePredicate;
- return SEL;
+ return CM_ScalarEpilogueAllowed;
}
// Process the loop in the VPlan-native vectorization path. This path builds
@@ -7543,9 +7556,8 @@ static bool processLoopInVPlanNativePath(
Function *F = L->getHeader()->getParent();
InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI());
- ScalarEpilogueLowering SEL =
- getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, AC, LI,
- PSE.getSE(), DT, LVL->getLAI());
+ ScalarEpilogueLowering SEL = getScalarEpilogueLowering(
+ F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, *LVL);
LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F,
&Hints, IAI);
@@ -7637,9 +7649,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Check the function attributes and profiles to find out if this function
// should be optimized for size.
- ScalarEpilogueLowering SEL =
- getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, AC, LI,
- PSE.getSE(), DT, LVL.getLAI());
+ ScalarEpilogueLowering SEL = getScalarEpilogueLowering(
+ F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, LVL);
// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
OpenPOWER on IntegriCloud