diff options
author | Sjoerd Meijer <sjoerd.meijer@arm.com> | 2019-11-06 09:58:36 +0000 |
---|---|---|
committer | Sjoerd Meijer <sjoerd.meijer@arm.com> | 2019-11-06 10:14:20 +0000 |
commit | 6c2a4f5ff93e16c3b86c18543e02a193ced2d956 (patch) | |
tree | d2cb3ef09bdcc0e963f75f4dd5cfa58705d450da /llvm/lib | |
parent | 9577ee84e638530be7a310c9d50526a36e3c212e (diff) | |
download | bcm5719-llvm-6c2a4f5ff93e16c3b86c18543e02a193ced2d956.tar.gz bcm5719-llvm-6c2a4f5ff93e16c3b86c18543e02a193ced2d956.zip |
[TTI][LV] preferPredicateOverEpilogue
We have two ways to steer creating a predicated vector body over creating a
scalar epilogue. To force this, we have 1) a command line option and 2) a
pragma available. This adds a third: a target hook to TargetTransformInfo that
can be queried whether predication is preferred or not, which allows the
vectoriser to make the decision without forcing it.
While this change behaves as a non-functional change for now, it shows the
required TTI plumbing, usage of this new hook in the vectoriser, and the
beginning of an ARM MVE implementation. I will follow up on this with:
- a complete MVE implementation, see D69845.
- a patch to disable this, i.e. we should respect "vector_predicate(disable)"
and its corresponding loophint.
Differential Revision: https://reviews.llvm.org/D69040
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 7 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 18 |
4 files changed, 70 insertions, 5 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index ba89a9eebdb..0b409840351 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -243,6 +243,12 @@ bool TargetTransformInfo::isHardwareLoopProfitable( return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); } +bool TargetTransformInfo::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, + ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, + DominatorTree *DT, const LoopAccessInfo *LAI) const { + return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); +} + void TargetTransformInfo::getUnrollingPreferences( Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const { return TTIImpl->getUnrollingPreferences(L, SE, UP); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index ed1d6e5ca36..eb698375985 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1000,6 +1000,50 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, return true; } +bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, + ScalarEvolution &SE, + AssumptionCache &AC, + TargetLibraryInfo *TLI, + DominatorTree *DT, + const LoopAccessInfo *LAI) { + // Creating a predicated vector loop is the first step for generating a + // tail-predicated hardware loop, for which we need the MVE masked + // load/stores instructions: + if (!ST->hasMVEIntegerOps()) + return false; + + HardwareLoopInfo HWLoopInfo(L); + if (!HWLoopInfo.canAnalyze(*LI)) { + LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not " + "analyzable.\n"); + return false; + } + + // This checks if we have the low-overhead branch architecture + // extension, and if we will create a hardware-loop: + if (!isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) { + LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not " + "profitable.\n"); + return false; + } + + if (!HWLoopInfo.isHardwareLoopCandidate(SE, *LI, *DT)) { + LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not " + "a candidate.\n"); + return false; + } + + // TODO: to set up a tail-predicated loop, which works by setting up + // the total number of elements processed by the loop, we need to + // determine the element size here, and if it is uniform for all operations + // in the vector loop. This means we will reject narrowing/widening + // operations, and don't want to predicate the vector loop, which is + // the main prep step for tail-predicated loops. + + return false; +} + + void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { // Only currently enable these preferences for M-Class cores. diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h index c4e1a17d80c..5bb3bcaf10e 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -203,7 +203,12 @@ public: AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo); - + bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, + ScalarEvolution &SE, + AssumptionCache &AC, + TargetLibraryInfo *TLI, + DominatorTree *DT, + const LoopAccessInfo *LAI); void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9b6223cbbdc..f10f0f3320d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7423,13 +7423,18 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { static ScalarEpilogueLowering getScalarEpilogueLowering(Function *F, Loop *L, LoopVectorizeHints &Hints, - ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { + ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, + TargetTransformInfo *TTI, TargetLibraryInfo *TLI, + AssumptionCache *AC, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, + const LoopAccessInfo *LAI) { ScalarEpilogueLowering SEL = CM_ScalarEpilogueAllowed; if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && (F->hasOptSize() || llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI))) SEL = CM_ScalarEpilogueNotAllowedOptSize; - else if (PreferPredicateOverEpilog || Hints.getPredicate()) + else if (PreferPredicateOverEpilog || Hints.getPredicate() || + TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, LAI)) SEL = CM_ScalarEpilogueNotNeededUsePredicate; return SEL; @@ -7449,7 +7454,10 @@ static bool processLoopInVPlanNativePath( assert(EnableVPlanNativePath && "VPlan-native path is disabled."); Function *F = L->getHeader()->getParent(); InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI()); - ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI); + + ScalarEpilogueLowering SEL = + getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, + PSE.getSE(), DT, LVL->getLAI()); LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F, &Hints, IAI); @@ -7541,7 +7549,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { // Check the function attributes and profiles to find out if this function // should be optimized for size. - ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI); + ScalarEpilogueLowering SEL = + getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, + PSE.getSE(), DT, LVL.getLAI()); // Entrance to the VPlan-native vectorization path. Outer loops are processed // here. They may require CFG and instruction level transformations before |