summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/LoopVectorize.cpp')
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp37
1 files changed, 26 insertions, 11 deletions
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5612e956794..06b70b3aea6 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -88,6 +88,7 @@
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -134,6 +135,7 @@
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include <algorithm>
#include <cassert>
@@ -1452,12 +1454,13 @@ struct LoopVectorize : public FunctionPass {
auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
- GetLAA, *ORE);
+ GetLAA, *ORE, PSI);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -1483,6 +1486,7 @@ struct LoopVectorize : public FunctionPass {
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
}
};
@@ -6054,6 +6058,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
namespace llvm {
@@ -7147,7 +7152,8 @@ static bool processLoopInVPlanNativePath(
Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT,
LoopVectorizationLegality *LVL, TargetTransformInfo *TTI,
TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, LoopVectorizeHints &Hints) {
+ OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints) {
assert(EnableVPlanNativePath && "VPlan-native path is disabled.");
Function *F = L->getHeader()->getParent();
@@ -7162,10 +7168,12 @@ static bool processLoopInVPlanNativePath(
// Get user vectorization factor.
const unsigned UserVF = Hints.getWidth();
- // Check the function attributes to find out if this function should be
- // optimized for size.
+ // Check the function attributes and profiles to find out if this function
+ // should be optimized for size.
bool OptForSize =
- Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->hasOptSize();
+ Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+ (F->hasOptSize() ||
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
// Plan how to best vectorize, return the best VF and its cost.
const VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF);
@@ -7245,10 +7253,12 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
- // Check the function attributes to find out if this function should be
- // optimized for size.
+ // Check the function attributes and profiles to find out if this function
+ // should be optimized for size.
bool OptForSize =
- Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->hasOptSize();
+ Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+ (F->hasOptSize() ||
+ llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI));
// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
@@ -7257,7 +7267,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// pipeline.
if (!L->empty())
return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC,
- ORE, Hints);
+ ORE, BFI, PSI, Hints);
assert(L->empty() && "Inner loop expected.");
// Check the loop for a trip count threshold: vectorize loops with a tiny trip
@@ -7523,7 +7533,7 @@ bool LoopVectorizePass::runImpl(
DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
- OptimizationRemarkEmitter &ORE_) {
+ OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) {
SE = &SE_;
LI = &LI_;
TTI = &TTI_;
@@ -7535,6 +7545,7 @@ bool LoopVectorizePass::runImpl(
GetLAA = &GetLAA_;
DB = &DB_;
ORE = &ORE_;
+ PSI = PSI_;
// Don't attempt if
// 1. the target claims to have no vector registers, and
@@ -7603,8 +7614,12 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
};
+ const ModuleAnalysisManager &MAM =
+ AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
+ ProfileSummaryInfo *PSI =
+ MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
bool Changed =
- runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE);
+ runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE, PSI);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
OpenPOWER on IntegriCloud