diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Analysis/InlineCost.cpp | 54 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/InlineSimple.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/Inliner.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/PartialInlining.cpp | 2 |
4 files changed, 48 insertions, 20 deletions
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 1567a6863b2..c26388a19bc 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -82,6 +82,11 @@ static cl::opt<int> HotCallSiteRelFreq( "entry frequency, for a callsite to be hot in the absence of " "profile information.")); +static cl::opt<bool> ComputeFullInlineCost( + "inline-cost-full", cl::Hidden, cl::init(false), + cl::desc("Compute the full inline cost of a call site even when the cost " + "exceeds the threshold.")); + namespace { class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { @@ -106,6 +111,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { // Cache the DataLayout since we use it a lot. const DataLayout &DL; + /// The OptimizationRemarkEmitter available for this compilation. + OptimizationRemarkEmitter *ORE; + /// The candidate callsite being analyzed. Please do not use this to do /// analysis in the caller function; we want the inline cost query to be /// easily cacheable. Instead, use the cover function paramHasAttr. @@ -243,10 +251,10 @@ public: CallAnalyzer(const TargetTransformInfo &TTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI, - ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg, - const InlineParams &Params) + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, + Function &Callee, CallSite CSArg, const InlineParams &Params) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), - PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), + PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), @@ -1138,7 +1146,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { // out. Pretend to inline the function, with a custom threshold. auto IndirectCallParams = Params; IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; - CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, *F, CS, + CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, CS, IndirectCallParams); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the @@ -1198,7 +1206,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { std::min((int64_t)CostUpperBound, (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost); - if (CostLowerBound > Threshold) { + if (CostLowerBound > Threshold && !ComputeFullInlineCost) { Cost = CostLowerBound; return false; } @@ -1347,21 +1355,36 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB, else Cost += InlineConstants::InstrCost; + using namespace ore; // If the visit this instruction detected an uninlinable pattern, abort. if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || - HasIndirectBr || HasFrameEscape) + HasIndirectBr || HasFrameEscape) { + if (ORE) + ORE->emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", + CandidateCS.getInstruction()) + << NV("Callee", &F) + << " has uninlinable pattern and cost is not fully computed"); return false; + } // If the caller is a recursive function then we don't want to inline // functions which allocate a lot of stack space because it would increase // the caller stack usage dramatically. if (IsCallerRecursive && - AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) { + if (ORE) + ORE->emit( + OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", + CandidateCS.getInstruction()) + << NV("Callee", &F) + << " is recursive and allocates too much stack space. Cost is " + "not fully computed"); return false; + } // Check if we've past the maximum possible threshold so we don't spin in // huge basic blocks that will never inline. - if (Cost > Threshold) + if (Cost > Threshold && !ComputeFullInlineCost) return false; } @@ -1447,7 +1470,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { Cost += InlineConstants::ColdccPenalty; // Check if we're done. This can happen due to bonuses and penalties. - if (Cost > Threshold) + if (Cost > Threshold && !ComputeFullInlineCost) return false; if (F.empty()) @@ -1513,7 +1536,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. - if (Cost > Threshold) + if (Cost > Threshold && !ComputeFullInlineCost) break; BasicBlock *BB = BBWorklist[Idx]; @@ -1657,9 +1680,9 @@ InlineCost llvm::getInlineCost( CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, - ProfileSummaryInfo *PSI) { + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI, - GetAssumptionCache, GetBFI, PSI); + GetAssumptionCache, GetBFI, PSI, ORE); } InlineCost llvm::getInlineCost( @@ -1667,7 +1690,7 @@ InlineCost llvm::getInlineCost( TargetTransformInfo &CalleeTTI, std::function<AssumptionCache &(Function &)> &GetAssumptionCache, Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI, - ProfileSummaryInfo *PSI) { + ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { // Cannot inline indirect calls. if (!Callee) @@ -1699,10 +1722,13 @@ InlineCost llvm::getInlineCost( CS.isNoInline()) return llvm::InlineCost::getNever(); + if (ORE) + ComputeFullInlineCost = true; + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "... (caller:" << Caller->getName() << ")\n"); - CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, *Callee, CS, + CallAnalyzer CA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Callee, CS, Params); bool ShouldInline = CA.analyzeCall(CS); diff --git a/llvm/lib/Transforms/IPO/InlineSimple.cpp b/llvm/lib/Transforms/IPO/InlineSimple.cpp index 50e7cc89a3b..1af40d3dab8 100644 --- a/llvm/lib/Transforms/IPO/InlineSimple.cpp +++ b/llvm/lib/Transforms/IPO/InlineSimple.cpp @@ -57,12 +57,13 @@ public: InlineCost getInlineCost(CallSite CS) override { Function *Callee = CS.getCalledFunction(); TargetTransformInfo &TTI = TTIWP->getTTI(*Callee); + OptimizationRemarkEmitter ORE(CS.getCaller()); std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache, - /*GetBFI=*/None, PSI); + /*GetBFI=*/None, PSI, &ORE); } bool runOnSCC(CallGraphSCC &SCC) override; diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 10a83f48ff4..3c46913f8a8 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -845,6 +845,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, FunctionAnalysisManager &FAM = AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG) .getManager(); + + // Get the remarks emission analysis for the caller. + auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); + std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult<AssumptionAnalysis>(F); @@ -857,12 +861,9 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, Function &Callee = *CS.getCalledFunction(); auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee); return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, {GetBFI}, - PSI); + PSI, &ORE); }; - // Get the remarks emission analysis for the caller. - auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); - // Now process as many calls as we have within this caller in the sequnece. // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index 8840435af64..668a81203fc 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -462,7 +462,7 @@ bool PartialInlinerImpl::shouldPartialInline( Function *Caller = CS.getCaller(); auto &CalleeTTI = (*GetTTI)(*Callee); InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, - *GetAssumptionCache, GetBFI, PSI); + *GetAssumptionCache, GetBFI, PSI, &ORE); if (IC.isAlways()) { ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) |