summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSjoerd Meijer <sjoerd.meijer@arm.com>2019-11-06 09:58:36 +0000
committerSjoerd Meijer <sjoerd.meijer@arm.com>2019-11-06 10:14:20 +0000
commit6c2a4f5ff93e16c3b86c18543e02a193ced2d956 (patch)
treed2cb3ef09bdcc0e963f75f4dd5cfa58705d450da /llvm/lib
parent9577ee84e638530be7a310c9d50526a36e3c212e (diff)
downloadbcm5719-llvm-6c2a4f5ff93e16c3b86c18543e02a193ced2d956.tar.gz
bcm5719-llvm-6c2a4f5ff93e16c3b86c18543e02a193ced2d956.zip
[TTI][LV] preferPredicateOverEpilogue
We have two ways to steer creating a predicated vector body over creating a scalar epilogue. To force this, we have 1) a command line option and 2) a pragma available. This adds a third: a target hook to TargetTransformInfo that can be queried whether predication is preferred or not, which allows the vectoriser to make the decision without forcing it. While this change behaves as a non-functional change for now, it shows the required TTI plumbing, usage of this new hook in the vectoriser, and the beginning of an ARM MVE implementation. I will follow up on this with: - a complete MVE implementation, see D69845. - a patch to disable this, i.e. we should respect "vector_predicate(disable)" and its corresponding loophint. Differential Revision: https://reviews.llvm.org/D69040
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp6
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp44
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h7
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp18
4 files changed, 70 insertions, 5 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index ba89a9eebdb..0b409840351 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -243,6 +243,12 @@ bool TargetTransformInfo::isHardwareLoopProfitable(
return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
}
+bool TargetTransformInfo::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
+ ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI,
+ DominatorTree *DT, const LoopAccessInfo *LAI) const {
+ return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
+}
+
void TargetTransformInfo::getUnrollingPreferences(
Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
return TTIImpl->getUnrollingPreferences(L, SE, UP);
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index ed1d6e5ca36..eb698375985 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1000,6 +1000,50 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
return true;
}
+bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
+ ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *TLI,
+ DominatorTree *DT,
+ const LoopAccessInfo *LAI) {
+ // Creating a predicated vector loop is the first step for generating a
+ // tail-predicated hardware loop, for which we need the MVE masked
+ // load/stores instructions:
+ if (!ST->hasMVEIntegerOps())
+ return false;
+
+ HardwareLoopInfo HWLoopInfo(L);
+ if (!HWLoopInfo.canAnalyze(*LI)) {
+ LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
+ "analyzable.\n");
+ return false;
+ }
+
+ // This checks if we have the low-overhead branch architecture
+ // extension, and if we will create a hardware-loop:
+ if (!isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
+ LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
+ "profitable.\n");
+ return false;
+ }
+
+ if (!HWLoopInfo.isHardwareLoopCandidate(SE, *LI, *DT)) {
+ LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
+ "a candidate.\n");
+ return false;
+ }
+
+ // TODO: to set up a tail-predicated loop, which works by setting up
+ // the total number of elements processed by the loop, we need to
+ // determine the element size here, and if it is uniform for all operations
+ // in the vector loop. This means we will reject narrowing/widening
+ // operations, and don't want to predicate the vector loop, which is
+ // the main prep step for tail-predicated loops.
+
+ return false;
+}
+
+
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// Only currently enable these preferences for M-Class cores.
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index c4e1a17d80c..5bb3bcaf10e 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -203,7 +203,12 @@ public:
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo);
-
+ bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
+ ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *TLI,
+ DominatorTree *DT,
+ const LoopAccessInfo *LAI);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9b6223cbbdc..f10f0f3320d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7423,13 +7423,18 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
static ScalarEpilogueLowering
getScalarEpilogueLowering(Function *F, Loop *L, LoopVectorizeHints &Hints,
- ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
+ TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
+ AssumptionCache *AC, LoopInfo *LI,
+ ScalarEvolution *SE, DominatorTree *DT,
+ const LoopAccessInfo *LAI) {
ScalarEpilogueLowering SEL = CM_ScalarEpilogueAllowed;
if (Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
(F->hasOptSize() ||
llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI)))
SEL = CM_ScalarEpilogueNotAllowedOptSize;
- else if (PreferPredicateOverEpilog || Hints.getPredicate())
+ else if (PreferPredicateOverEpilog || Hints.getPredicate() ||
+ TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, LAI))
SEL = CM_ScalarEpilogueNotNeededUsePredicate;
return SEL;
@@ -7449,7 +7454,10 @@ static bool processLoopInVPlanNativePath(
assert(EnableVPlanNativePath && "VPlan-native path is disabled.");
Function *F = L->getHeader()->getParent();
InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI());
- ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI);
+
+ ScalarEpilogueLowering SEL =
+ getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, AC, LI,
+ PSE.getSE(), DT, LVL->getLAI());
LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F,
&Hints, IAI);
@@ -7541,7 +7549,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Check the function attributes and profiles to find out if this function
// should be optimized for size.
- ScalarEpilogueLowering SEL = getScalarEpilogueLowering(F, L, Hints, PSI, BFI);
+ ScalarEpilogueLowering SEL =
+ getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, AC, LI,
+ PSE.getSE(), DT, LVL.getLAI());
// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
OpenPOWER on IntegriCloud