diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
-rw-r--r-- | llvm/lib/Transforms/Scalar/ConstantHoisting.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp | 36 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 24 |
4 files changed, 64 insertions, 17 deletions
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index f1cc5e4de6e..683cf32eb30 100644 --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -41,6 +41,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/BasicBlock.h" @@ -60,6 +61,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -111,6 +113,7 @@ public: if (ConstHoistWithBlockFrequency) AU.addRequired<BlockFrequencyInfoWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); } @@ -126,6 +129,7 @@ INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist", "Constant Hoisting", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist", "Constant Hoisting", false, false) @@ -148,7 +152,8 @@ bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) { ConstHoistWithBlockFrequency ? &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI() : nullptr, - Fn.getEntryBlock()); + Fn.getEntryBlock(), + &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI()); if (MadeChange) { LLVM_DEBUG(dbgs() << "********** Function after Constant Hoisting: " @@ -548,7 +553,9 @@ ConstantHoistingPass::maximizeConstantsInRange(ConstCandVecType::iterator S, ConstCandVecType::iterator &MaxCostItr) { unsigned NumUses = 0; - if(!Entry->getParent()->hasOptSize() || std::distance(S,E) > 100) { + bool OptForSize = Entry->getParent()->hasOptSize() || + llvm::shouldOptimizeForSize(Entry->getParent(), PSI, BFI); + if (!OptForSize || std::distance(S,E) > 100) { for (auto ConstCand = S; ConstCand != E; ++ConstCand) { NumUses += ConstCand->Uses.size(); if (ConstCand->CumulativeCost > MaxCostItr->CumulativeCost) @@ -919,13 +926,14 @@ void ConstantHoistingPass::deleteDeadCastInst() const { /// Optimize expensive integer constants in the given function. bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI, DominatorTree &DT, BlockFrequencyInfo *BFI, - BasicBlock &Entry) { + BasicBlock &Entry, ProfileSummaryInfo *PSI) { this->TTI = &TTI; this->DT = &DT; this->BFI = BFI; this->DL = &Fn.getParent()->getDataLayout(); this->Ctx = &Fn.getContext(); this->Entry = &Entry; + this->PSI = PSI; // Collect all constant candidates. collectConstantCandidates(Fn); @@ -962,7 +970,9 @@ PreservedAnalyses ConstantHoistingPass::run(Function &F, auto BFI = ConstHoistWithBlockFrequency ? &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; - if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock())) + auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager(); + auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); + if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock(), PSI)) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index 75266101123..68bb28d34d8 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -29,11 +29,14 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" @@ -54,6 +57,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/LoopVersioning.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include <algorithm> #include <cassert> #include <forward_list> @@ -159,8 +163,9 @@ namespace { class LoadEliminationForLoop { public: LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI, - DominatorTree *DT) - : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.getPSE()) {} + DominatorTree *DT, BlockFrequencyInfo *BFI, + ProfileSummaryInfo* PSI) + : L(L), LI(LI), LAI(LAI), DT(DT), BFI(BFI), PSI(PSI), PSE(LAI.getPSE()) {} /// Look through the loop-carried and loop-independent dependences in /// this loop and find store->load dependences. @@ -529,7 +534,11 @@ public: } if (!Checks.empty() || !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) { - if (L->getHeader()->getParent()->hasOptSize()) { + auto *HeaderBB = L->getHeader(); + auto *F = HeaderBB->getParent(); + bool OptForSize = F->hasOptSize() || + llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI); + if (OptForSize) { LLVM_DEBUG( dbgs() << "Versioning is needed but not allowed when optimizing " "for size.\n"); @@ -572,6 +581,8 @@ private: LoopInfo *LI; const LoopAccessInfo &LAI; DominatorTree *DT; + BlockFrequencyInfo *BFI; + ProfileSummaryInfo *PSI; PredicatedScalarEvolution PSE; }; @@ -579,6 +590,7 @@ private: static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, function_ref<const LoopAccessInfo &(Loop &)> GetLAI) { // Build up a worklist of inner-loops to transform to avoid iterator // invalidation. @@ -597,7 +609,7 @@ eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI, DominatorTree &DT, bool Changed = false; for (Loop *L : Worklist) { // The actual work is performed by LoadEliminationForLoop. - LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT); + LoadEliminationForLoop LEL(L, &LI, GetLAI(*L), &DT, BFI, PSI); Changed |= LEL.processLoop(); } return Changed; @@ -622,10 +634,14 @@ public: auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); auto &LAA = getAnalysis<LoopAccessLegacyAnalysis>(); auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + auto *BFI = (PSI && PSI->hasProfileSummary()) ? + &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : + nullptr; // Process each loop nest in the function. return eliminateLoadsAcrossLoops( - F, LI, DT, + F, LI, DT, BFI, PSI, [&LAA](Loop &L) -> const LoopAccessInfo & { return LAA.getInfo(&L); }); } @@ -638,6 +654,8 @@ public: AU.addRequired<DominatorTreeWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); } }; @@ -653,6 +671,8 @@ INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false) FunctionPass *llvm::createLoopLoadEliminationPass() { @@ -668,13 +688,17 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F, auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); auto &AA = AM.getResult<AAManager>(F); auto &AC = AM.getResult<AssumptionAnalysis>(F); + auto &MAM = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager(); + auto *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); + auto *BFI = (PSI && PSI->hasProfileSummary()) ? + &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; MemorySSA *MSSA = EnableMSSALoopDependency ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr; auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager(); bool Changed = eliminateLoadsAcrossLoops( - F, LI, DT, [&](Loop &L) -> const LoopAccessInfo & { + F, LI, DT, BFI, PSI, [&](Loop &L) -> const LoopAccessInfo & { LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA}; return LAM.getResult<LoopAccessAnalysis>(L, AR); }); diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index 71e9953aed6..86891eb451b 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -294,7 +294,8 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, return LoopUnrollResult::Unmodified; TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, SE, TTI, OptLevel, None, None, None, None, None, None); + L, SE, TTI, nullptr, nullptr, OptLevel, + None, None, None, None, None, None); if (AllowUnrollAndJam.getNumOccurrences() > 0) UP.UnrollAndJam = AllowUnrollAndJam; if (UnrollAndJamThreshold.getNumOccurrences() > 0) diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 53d4ae84456..c113e4d3166 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -23,7 +23,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -55,6 +57,7 @@ #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include <algorithm> #include <cassert> @@ -165,7 +168,8 @@ static const unsigned NoThreshold = std::numeric_limits<unsigned>::max(); /// Gather the various unrolling parameters based on the defaults, compiler /// flags, TTI overrides and user specified parameters. TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( - Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, int OptLevel, + Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel, Optional<unsigned> UserThreshold, Optional<unsigned> UserCount, Optional<bool> UserAllowPartial, Optional<bool> UserRuntime, Optional<bool> UserUpperBound, Optional<bool> UserAllowPeeling) { @@ -198,7 +202,9 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences( TTI.getUnrollingPreferences(L, SE, UP); // Apply size attributes - if (L->getHeader()->getParent()->hasOptSize()) { + bool OptForSize = L->getHeader()->getParent()->hasOptSize() || + llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI); + if (OptForSize) { UP.Threshold = UP.OptSizeThreshold; UP.PartialThreshold = UP.PartialOptSizeThreshold; } @@ -963,7 +969,9 @@ bool llvm::computeUnrollCount( static LoopUnrollResult tryToUnrollLoop( Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, - OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel, + OptimizationRemarkEmitter &ORE, + BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, + bool PreserveLCSSA, int OptLevel, bool OnlyWhenForced, bool ForgetAllSCEV, Optional<unsigned> ProvidedCount, Optional<unsigned> ProvidedThreshold, Optional<bool> ProvidedAllowPartial, Optional<bool> ProvidedRuntime, Optional<bool> ProvidedUpperBound, @@ -989,7 +997,7 @@ static LoopUnrollResult tryToUnrollLoop( bool NotDuplicatable; bool Convergent; TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, SE, TTI, OptLevel, ProvidedThreshold, ProvidedCount, + L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling); // Exit early if unrolling is disabled. @@ -1176,7 +1184,8 @@ public: bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); LoopUnrollResult Result = tryToUnrollLoop( - L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced, + L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, + PreserveLCSSA, OptLevel, OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling); @@ -1257,6 +1266,7 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, + /*BFI*/ nullptr, /*PSI*/ nullptr, /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced, /*ForgetAllSCEV*/ false, /*Count*/ None, /*Threshold*/ None, /*AllowPartial*/ false, @@ -1359,6 +1369,8 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager(); ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); + auto *BFI = (PSI && PSI->hasProfileSummary()) ? + &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; bool Changed = false; @@ -1394,7 +1406,7 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, // The API here is quite complex to call and we allow to select some // flavors of unrolling during construction time (by setting UnrollOpts). LoopUnrollResult Result = tryToUnrollLoop( - &L, DT, &LI, SE, TTI, AC, ORE, + &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI, /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced, /*ForgetAllSCEV*/ false, /*Count*/ None, /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime, |