diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Passes/PassBuilder.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineInternal.h | 10 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 29 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/ConstantHoisting.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp | 36 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 24 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/SizeOpts.cpp | 37 | ||||
-rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 37 |
12 files changed, 180 insertions, 39 deletions
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index b26ee96092d..c5fd68299eb 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -575,8 +575,12 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, Options.DoCounterPromotion = true; Options.UseBFIInPromotion = IsCS; MPM.addPass(InstrProfiling(Options, IsCS)); - } else if (!ProfileFile.empty()) + } else if (!ProfileFile.empty()) { MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); + // Cache ProfileSummaryAnalysis once to avoid the potential need to insert + // RequireAnalysisPass for PSI before subsequent non-module passes. + MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); + } } static InlineParams @@ -649,6 +653,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase == ThinLTOPhase::PreLink)); + // Cache ProfileSummaryAnalysis once to avoid the potential need to insert + // RequireAnalysisPass for PSI before subsequent non-module passes. + MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard // for the profile annotation to be accurate in the ThinLTO backend. if (Phase != ThinLTOPhase::PreLink) @@ -1065,6 +1072,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, false /* ThinLTOPhase::PreLink */)); + // Cache ProfileSummaryAnalysis once to avoid the potential need to insert + // RequireAnalysisPass for PSI before subsequent non-module passes. + MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); } // Remove unused virtual tables to improve the quality of code generated by diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 2cc2e824cfb..d8b188ab8f8 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4178,7 +4178,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) { auto InstCombineErase = [this](Instruction *I) { eraseInstFromFunction(*I); }; - LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW, + LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW, InstCombineErase); if (Value *With = Simplifier.optimizeCall(CI)) { ++NumSimplified; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index ee1853613bc..a42cf95b45a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -52,12 +52,14 @@ namespace llvm { class APInt; class AssumptionCache; +class BlockFrequencyInfo; class DataLayout; class DominatorTree; class GEPOperator; class GlobalVariable; class LoopInfo; class OptimizationRemarkEmitter; +class ProfileSummaryInfo; class TargetLibraryInfo; class User; @@ -304,6 +306,8 @@ private: const DataLayout &DL; const SimplifyQuery SQ; OptimizationRemarkEmitter &ORE; + BlockFrequencyInfo *BFI; + ProfileSummaryInfo *PSI; // Optional analyses. When non-null, these can both be used to do better // combining and will be updated to reflect any changes. @@ -315,11 +319,11 @@ public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder, bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, - OptimizationRemarkEmitter &ORE, const DataLayout &DL, - LoopInfo *LI) + OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), - DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI) {} + DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {} /// Run the combiner over the entire worklist until it is empty. /// diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 6ee94a928e5..7e26dee3452 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -46,14 +46,17 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -3478,7 +3481,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, static bool combineInstructionsOverFunction( Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, - OptimizationRemarkEmitter &ORE, bool ExpensiveCombines = true, + OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, bool ExpensiveCombines = true, LoopInfo *LI = nullptr) { auto &DL = F.getParent()->getDataLayout(); ExpensiveCombines |= EnableExpensiveCombines; @@ -3509,7 +3513,7 @@ static bool combineInstructionsOverFunction( MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA, - AC, TLI, DT, ORE, DL, LI); + AC, TLI, DT, ORE, BFI, PSI, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; if (!IC.run()) @@ -3529,8 +3533,15 @@ PreservedAnalyses InstCombinePass::run(Function &F, auto *LI = AM.getCachedResult<LoopAnalysis>(F); auto *AA = &AM.getResult<AAManager>(F); + const ModuleAnalysisManager &MAM = + AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager(); + ProfileSummaryInfo *PSI = + MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); + auto *BFI = (PSI && PSI->hasProfileSummary()) ? + &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; + if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, - ExpensiveCombines, LI)) + BFI, PSI, ExpensiveCombines, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3554,6 +3565,8 @@ void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AAResultsWrapperPass>(); AU.addPreserved<BasicAAWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); } bool InstructionCombiningPass::runOnFunction(Function &F) { @@ -3570,9 +3583,15 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { // Optional analyses. auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + ProfileSummaryInfo *PSI = + &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + BlockFrequencyInfo *BFI = + (PSI && PSI->hasProfileSummary()) ? + &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : + nullptr; return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, - ExpensiveCombines, LI); + BFI, PSI, ExpensiveCombines, LI); } char InstructionCombiningPass::ID = 0; @@ -3585,6 +3604,8 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index f1cc5e4de6e..683cf32eb30 100644 --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -41,6 +41,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/BasicBlock.h" @@ -60,6 +61,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -111,6 +113,7 @@ public: if (ConstHoistWithBlockFrequency) AU.addRequired<BlockFrequencyInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); } @@ -126,6 +129,7 @@ INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist", "Constant Hoisting", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist", "Constant Hoisting", false, false) @@ -148,7 +152,8 @@ bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) { ConstHoistWithBlockFrequency ? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI() : nullptr, - Fn.getEntryBlock()); + Fn.getEntryBlock(), + &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI()); if (MadeChange) { LLVM_DEBUG(dbgs() << "********** Function after Constant Hoisting: " @@ -548,7 +553,9 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S, ConstCandVecType::iterator &MaxCostItr) { unsigned NumUses = 0; - if(!Entry->getParent()->hasOptSize() || std::distance(S,E) > 100) { + bool OptForSize = Entry->getParent()->hasOptSize() || + llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI); + if (!OptForSize || std::distance(S,E) > 100) { for (auto ConstCand = S; ConstCand != E; ++ConstCand) { NumUses += ConstCand->Uses.size(); if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost) @@ -919,13 +926,14 @@ void ConstantHoistingPass::deleteDeadCastInst() const { /// Optimize expensive integer constants in the given function. bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI, DominatorTree &DT, BlockFrequencyInfo *BFI, - BasicBlock &Entry) { + BasicBlock &Entry, ProfileSummaryInfo *PSI) { this->TTI = &TTI; this->DT = &DT; this->BFI = BFI; this->DL = &Fn.getParent()->getDataLayout(); this->Ctx = &Fn.getContext(); this->Entry = &Entry; + this->PSI = PSI; // Collect all constant candidates. collectConstantCandidates(Fn); @@ -962,7 +970,9 @@ PreservedAnalyses ConstantHoistingPass::run(Function &F, auto BFI = ConstHoistWithBlockFrequency ? &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; - if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock())) + auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager(); + auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); + if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock(), PSI)) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index 75266101123..68bb28d34d8 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -29,11 +29,14 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -54,6 +57,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include <algorithm> #include <cassert> #include <forward_list> @@ -159,8 +163,9 @@ namespace { class LoadEliminationForLoop { public: LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI, - DominatorTree *DT) - : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {} + DominatorTree *DT, BlockFrequencyInfo *BFI, + ProfileSummaryInfo* PSI) + : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {} /// Look through the loop-carried and loop-independent dependences in /// this loop and find store->load dependences. @@ -529,7 +534,11 @@ public: } if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) { - if (L->getHeader()->getParent()->hasOptSize()) { + auto *HeaderBB = L->getHeader(); + auto *F = HeaderBB->getParent(); + bool OptForSize = F->hasOptSize() || + llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI); + if (OptForSize) { LLVM_DEBUG( dbgs() << "Versioning is needed but not allowed when optimizing " "for size.\n"); @@ -572,6 +581,8 @@ private: LoopInfo *LI; const LoopAccessInfo &LAI; DominatorTree *DT; + BlockFrequencyInfo *BFI; + ProfileSummaryInfo *PSI; PredicatedScalarEvolution PSE; }; @@ -579,6 +590,7 @@ private: static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, function_ref<const LoopAccessInfo &(Loop &)> GetLAI) { // Build up a worklist of inner-loops to transform to avoid iterator // invalidation. @@ -597,7 +609,7 @@ eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, bool Changed = false; for (Loop *L : Worklist) { // The actual work is performed by LoadEliminationForLoop. - LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT); + LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI); Changed |= LEL.processLoop(); } return Changed; @@ -622,10 +634,14 @@ public: auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>(); auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + auto *BFI = (PSI && PSI->hasProfileSummary()) ? + &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : + nullptr; // Process each loop nest in the function. return eliminateLoadsAcrossLoops( - F, LI, DT, + F, LI, DT, BFI, PSI, [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); }); } @@ -638,6 +654,8 @@ public: AU.addRequired<DominatorTreeWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); } }; @@ -653,6 +671,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false) FunctionPass *llvm::createLoopLoadEliminationPass() { @@ -668,13 +688,17 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F, auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); auto &AA = AM.getResult<AAManager>(F); auto &AC = AM.getResult<AssumptionAnalysis>(F); + auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager(); + auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); + auto *BFI = (PSI && PSI->hasProfileSummary()) ? + &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; MemorySSA *MSSA = EnableMSSALoopDependency ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr; auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager(); bool Changed = eliminateLoadsAcrossLoops( - F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & { + F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & { LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA}; return LAM.getResult<LoopAccessAnalysis>(L, AR); }); diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index 71e9953aed6..86891eb451b 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -294,7 +294,8 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, return LoopUnrollResult::Unmodified; TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, SE, TTI, OptLevel, None, None, None, None, None, None); + L, SE, TTI, nullptr, nullptr, OptLevel, + None, None, None, None, None, None); if (AllowUnrollAndJam.getNumOccurrences() > 0) UP.UnrollAndJam = AllowUnrollAndJam; if (UnrollAndJamThreshold.getNumOccurrences() > 0) diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 53d4ae84456..c113e4d3166 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -23,7 +23,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -55,6 +57,7 @@ #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include <algorithm> #include <cassert> @@ -165,7 +168,8 @@ static const unsigned NoThreshold = std::numeric_limits<unsigned>::max(); /// Gather the various unrolling parameters based on the defaults, compiler /// flags, TTI overrides and user specified parameters. TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( - Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel, + Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional<unsigned> UserThreshold, Optional<unsigned> UserCount, Optional<bool> UserAllowPartial, Optional<bool> UserRuntime, Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) { @@ -198,7 +202,9 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( TTI.getUnrollingPreferences(L, SE, UP); // Apply size attributes - if (L->getHeader()->getParent()->hasOptSize()) { + bool OptForSize = L->getHeader()->getParent()->hasOptSize() || + llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI); + if (OptForSize) { UP.Threshold = UP.OptSizeThreshold; UP.PartialThreshold = UP.PartialOptSizeThreshold; } @@ -963,7 +969,9 @@ bool llvm::computeUnrollCount( static LoopUnrollResult tryToUnrollLoop( Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, - OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel, + OptimizationRemarkEmitter &ORE, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, + bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount, Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial, Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound, @@ -989,7 +997,7 @@ static LoopUnrollResult tryToUnrollLoop( bool NotDuplicatable; bool Convergent; TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount, + L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling); // Exit early if unrolling is disabled. @@ -1176,7 +1184,8 @@ public: bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); LoopUnrollResult Result = tryToUnrollLoop( - L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced, + L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, + PreserveLCSSA, OptLevel, OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling); @@ -1257,6 +1266,7 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, + /*BFI*/ nullptr, /*PSI*/ nullptr, /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced, /*ForgetAllSCEV*/ false, /*Count*/ None, /*Threshold*/ None, /*AllowPartial*/ false, @@ -1359,6 +1369,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager(); ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); + auto *BFI = (PSI && PSI->hasProfileSummary()) ? + &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; bool Changed = false; @@ -1394,7 +1406,7 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, // The API here is quite complex to call and we allow to select some // flavors of unrolling during construction time (by setting UnrollOpts). LoopUnrollResult Result = tryToUnrollLoop( - &L, DT, &LI, SE, TTI, AC, ORE, + &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI, /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced, /*ForgetAllSCEV*/ false, /*Count*/ None, /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime, diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt index cb3dc17c03a..c232aa6223c 100644 --- a/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -51,6 +51,7 @@ add_llvm_library(LLVMTransformUtils SimplifyCFG.cpp SimplifyIndVar.cpp SimplifyLibCalls.cpp + SizeOpts.cpp SplitModule.cpp StripNonLineTableDebugInfo.cpp SymbolRewriter.cpp diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index f35328ca8d4..23b88dbda26 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -16,8 +16,10 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" @@ -34,6 +36,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/SizeOpts.h" using namespace llvm; using namespace PatternMatch; @@ -2375,7 +2378,9 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) { // Don't rewrite fputs to fwrite when optimising for size because fwrite // requires more arguments and thus extra MOVs are required. - if (CI->getFunction()->hasOptSize()) + bool OptForSize = CI->getFunction()->hasOptSize() || + llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); + if (OptForSize) return nullptr; // Check if has any use @@ -2750,9 +2755,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { LibCallSimplifier::LibCallSimplifier( const DataLayout &DL, const TargetLibraryInfo *TLI, OptimizationRemarkEmitter &ORE, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, function_ref<void(Instruction *, Value *)> Replacer, function_ref<void(Instruction *)> Eraser) - : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), + : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI), UnsafeFPShrink(false), Replacer(Replacer), Eraser(Eraser) {} void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) { diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp new file mode 100644 index 00000000000..1519751197d --- /dev/null +++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp @@ -0,0 +1,37 @@ +//===-- SizeOpts.cpp - code size optimization related code ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains some shared code size optimization related code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/SizeOpts.h" +using namespace llvm; + +static cl::opt<bool> ProfileGuidedSizeOpt( + "pgso", cl::Hidden, cl::init(true), + cl::desc("Enable the profile guided size optimization. ")); + +bool llvm::shouldOptimizeForSize(Function *F, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) { + assert(F); + if (!PSI || !BFI || !PSI->hasProfileSummary()) + return false; + return ProfileGuidedSizeOpt && PSI->isFunctionColdInCallGraph(F, *BFI); +} + +bool llvm::shouldOptimizeForSize(BasicBlock *BB, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) { + assert(BB); + if (!PSI || !BFI || !PSI->hasProfileSummary()) + return false; + return ProfileGuidedSizeOpt && PSI->isColdBlock(BB, BFI); +} diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5612e956794..06b70b3aea6 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -88,6 +88,7 @@ #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -134,6 +135,7 @@ #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" #include <algorithm> #include <cassert> @@ -1452,12 +1454,13 @@ struct LoopVectorize : public FunctionPass { auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>(); auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits(); auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); + auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); std::function<const LoopAccessInfo &(Loop &)> GetLAA = [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); }; return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC, - GetLAA, *ORE); + GetLAA, *ORE, PSI); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -1483,6 +1486,7 @@ struct LoopVectorize : public FunctionPass { AU.addPreserved<BasicAAWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); } }; @@ -6054,6 +6058,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false) namespace llvm { @@ -7147,7 +7152,8 @@ static bool processLoopInVPlanNativePath( Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT, LoopVectorizationLegality *LVL, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC, - OptimizationRemarkEmitter *ORE, LoopVectorizeHints &Hints) { + OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints) { assert(EnableVPlanNativePath && "VPlan-native path is disabled."); Function *F = L->getHeader()->getParent(); @@ -7162,10 +7168,12 @@ static bool processLoopInVPlanNativePath( // Get user vectorization factor. const unsigned UserVF = Hints.getWidth(); - // Check the function attributes to find out if this function should be - // optimized for size. + // Check the function attributes and profiles to find out if this function + // should be optimized for size. bool OptForSize = - Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->hasOptSize(); + Hints.getForce() != LoopVectorizeHints::FK_Enabled && + (F->hasOptSize() || + llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI)); // Plan how to best vectorize, return the best VF and its cost. const VectorizationFactor VF = LVP.planInVPlanNativePath(OptForSize, UserVF); @@ -7245,10 +7253,12 @@ bool LoopVectorizePass::processLoop(Loop *L) { return false; } - // Check the function attributes to find out if this function should be - // optimized for size. + // Check the function attributes and profiles to find out if this function + // should be optimized for size. bool OptForSize = - Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->hasOptSize(); + Hints.getForce() != LoopVectorizeHints::FK_Enabled && + (F->hasOptSize() || + llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI)); // Entrance to the VPlan-native vectorization path. Outer loops are processed // here. They may require CFG and instruction level transformations before @@ -7257,7 +7267,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { // pipeline. if (!L->empty()) return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC, - ORE, Hints); + ORE, BFI, PSI, Hints); assert(L->empty() && "Inner loop expected."); // Check the loop for a trip count threshold: vectorize loops with a tiny trip @@ -7523,7 +7533,7 @@ bool LoopVectorizePass::runImpl( DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_, std::function<const LoopAccessInfo &(Loop &)> &GetLAA_, - OptimizationRemarkEmitter &ORE_) { + OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) { SE = &SE_; LI = &LI_; TTI = &TTI_; @@ -7535,6 +7545,7 @@ bool LoopVectorizePass::runImpl( GetLAA = &GetLAA_; DB = &DB_; ORE = &ORE_; + PSI = PSI_; // Don't attempt if // 1. the target claims to have no vector registers, and @@ -7603,8 +7614,12 @@ PreservedAnalyses LoopVectorizePass::run(Function &F, LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA}; return LAM.getResult<LoopAccessAnalysis>(L, AR); }; + const ModuleAnalysisManager &MAM = + AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager(); + ProfileSummaryInfo *PSI = + MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); bool Changed = - runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE); + runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AA, AC, GetLAA, ORE, PSI); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; |