From 09e539fcaebb6362795d352cdcf4a818cf4d0d6a Mon Sep 17 00:00:00 2001 From: Hiroshi Yamauchi Date: Mon, 15 Apr 2019 16:49:00 +0000 Subject: [PGO] Profile guided code size optimization. Summary: Enable some of the existing size optimizations for cold code under PGO. A ~5% code size saving in big internal app under PGO. The way it gets BFI/PSI is discussed in the RFC thread http://lists.llvm.org/pipermail/llvm-dev/2019-March/130894.html Note it doesn't currently touch loop passes. Reviewers: davidxl, eraman Reviewed By: eraman Subscribers: mgorny, javed.absar, smeenai, mehdi_amini, eraman, zzheng, steven_wu, dexonsmith, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D59514 llvm-svn: 358422 --- .../Transforms/InstCombine/InstCombineCalls.cpp | 2 +- .../Transforms/InstCombine/InstCombineInternal.h | 10 +++++--- .../InstCombine/InstructionCombining.cpp | 29 +++++++++++++++++++--- 3 files changed, 33 insertions(+), 8 deletions(-) (limited to 'llvm/lib/Transforms/InstCombine') diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 2cc2e824cfb..d8b188ab8f8 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4178,7 +4178,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) { auto InstCombineErase = [this](Instruction *I) { eraseInstFromFunction(*I); }; - LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW, + LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW, InstCombineErase); if (Value *With = Simplifier.optimizeCall(CI)) { ++NumSimplified; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index ee1853613bc..a42cf95b45a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -52,12 +52,14 @@ namespace llvm { class APInt; class AssumptionCache; +class BlockFrequencyInfo; class DataLayout; class DominatorTree; class GEPOperator; class GlobalVariable; class LoopInfo; class OptimizationRemarkEmitter; +class ProfileSummaryInfo; class TargetLibraryInfo; class User; @@ -304,6 +306,8 @@ private: const DataLayout &DL; const SimplifyQuery SQ; OptimizationRemarkEmitter &ORE; + BlockFrequencyInfo *BFI; + ProfileSummaryInfo *PSI; // Optional analyses. When non-null, these can both be used to do better // combining and will be updated to reflect any changes. @@ -315,11 +319,11 @@ public: InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder, bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, - OptimizationRemarkEmitter &ORE, const DataLayout &DL, - LoopInfo *LI) + OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, const DataLayout &DL, LoopInfo *LI) : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize), ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), - DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), LI(LI) {} + DL(DL), SQ(DL, &TLI, &DT, &AC), ORE(ORE), BFI(BFI), PSI(PSI), LI(LI) {} /// Run the combiner over the entire worklist until it is empty. /// diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 6ee94a928e5..7e26dee3452 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -46,14 +46,17 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -3478,7 +3481,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, static bool combineInstructionsOverFunction( Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, - OptimizationRemarkEmitter &ORE, bool ExpensiveCombines = true, + OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, bool ExpensiveCombines = true, LoopInfo *LI = nullptr) { auto &DL = F.getParent()->getDataLayout(); ExpensiveCombines |= EnableExpensiveCombines; @@ -3509,7 +3513,7 @@ static bool combineInstructionsOverFunction( MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA, - AC, TLI, DT, ORE, DL, LI); + AC, TLI, DT, ORE, BFI, PSI, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; if (!IC.run()) @@ -3529,8 +3533,15 @@ PreservedAnalyses InstCombinePass::run(Function &F, auto *LI = AM.getCachedResult(F); auto *AA = &AM.getResult(F); + const ModuleAnalysisManager &MAM = + AM.getResult(F).getManager(); + ProfileSummaryInfo *PSI = + MAM.getCachedResult(*F.getParent()); + auto *BFI = (PSI && PSI->hasProfileSummary()) ? + &AM.getResult(F) : nullptr; + if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, - ExpensiveCombines, LI)) + BFI, PSI, ExpensiveCombines, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3554,6 +3565,8 @@ void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); } bool InstructionCombiningPass::runOnFunction(Function &F) { @@ -3570,9 +3583,15 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { // Optional analyses. auto *LIWP = getAnalysisIfAvailable(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + ProfileSummaryInfo *PSI = + &getAnalysis().getPSI(); + BlockFrequencyInfo *BFI = + (PSI && PSI->hasProfileSummary()) ? + &getAnalysis().getBFI() : + nullptr; return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, - ExpensiveCombines, LI); + BFI, PSI, ExpensiveCombines, LI); } char InstructionCombiningPass::ID = 0; @@ -3585,6 +3604,8 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) -- cgit v1.2.3