diff options
| author | Easwaran Raman <eraman@google.com> | 2017-07-28 21:47:36 +0000 |
|---|---|---|
| committer | Easwaran Raman <eraman@google.com> | 2017-07-28 21:47:36 +0000 |
| commit | 51b809bf2f6e81be5d2390acaee09cbe1e26949c (patch) | |
| tree | 1c88273f523ecb1a3c4a72d67add5b6c3993a049 /llvm/lib | |
| parent | 3bcad2b2e671e3ef948a4d537143274ae1a89609 (diff) | |
| download | bcm5719-llvm-51b809bf2f6e81be5d2390acaee09cbe1e26949c.tar.gz bcm5719-llvm-51b809bf2f6e81be5d2390acaee09cbe1e26949c.zip | |
[Inliner] Do not apply any bonus for cold callsites.
Summary:
Inlining threshold is increased by application of bonuses when the
callee has a single reachable basic block or is rich in vector
instructions. Similarly, inlining cost is reduced by applying a large
bonus when the last call to a static function is considered for
inlining. This patch disables the application of these bonuses when the
callsite or the callee is cold. The intention here is to prevent a large
cold callsite from being inlined to a non-cold caller that could prevent
the caller from being inlined. This is especially important when the
cold callsite is a last call to a static since the associated bonus is
very high.
Reviewers: chandlerc, davidxl
Subscribers: danielcdh, llvm-commits
Differential Revision: https://reviews.llvm.org/D35823
llvm-svn: 309441
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Analysis/InlineCost.cpp | 103 |
1 files changed, 75 insertions, 28 deletions
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 8be2ee7881a..f82c802c8a2 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -119,8 +119,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize; unsigned NumInstructions, NumVectorInstructions; - int FiftyPercentVectorBonus, TenPercentVectorBonus; - int VectorBonus; + int VectorBonus, TenPercentVectorBonus; + // Bonus to be applied when the callee has only one reachable basic block. + int SingleBBBonus; /// While we walk the potentially-inlined instructions, we build up and /// maintain a mapping of simplified values specific to this callsite. The @@ -235,11 +236,11 @@ public: ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), - NumVectorInstructions(0), FiftyPercentVectorBonus(0), - TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), - NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), - NumConstantPtrDiffs(0), NumInstructionsSimplified(0), - SROACostSavings(0), SROACostSavingsLost(0) {} + NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0), + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), + SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -678,11 +679,49 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { return B ? std::max(A, B.getValue()) : A; }; + // Various bonus percentages. These are multiplied by Threshold to get the + // bonus values. + // SingleBBBonus: This bonus is applied if the callee has a single reachable + // basic block at the given callsite context. This is speculatively applied + // and withdrawn if more than one basic block is seen. + // + // Vector bonuses: We want to more aggressively inline vector-dense kernels + // and apply this bonus based on the percentage of vector instructions. A + // bonus is applied if the vector instructions exceed 50% and half that amount + // is applied if it exceeds 10%. Note that these bonuses are some what + // arbitrary and evolved over time by accident as much as because they are + // principled bonuses. + // FIXME: It would be nice to base the bonus values on something more + // scientific. + // + // LstCallToStaticBonus: This large bonus is applied to ensure the inlining + // of the last call to a static function as inlining such functions is + // guaranteed to reduce code size. + // + // These bonus percentages may be set to 0 based on properties of the caller + // and the callsite. + int SingleBBBonusPercent = 50; + int VectorBonusPercent = 150; + int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus; + + // Lambda to set all the above bonus and bonus percentages to 0. + auto DisallowAllBonuses = [&]() { + SingleBBBonusPercent = 0; + VectorBonusPercent = 0; + LastCallToStaticBonus = 0; + }; + // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available // and reduce the threshold if the caller has the necessary attribute. - if (Caller->optForMinSize()) + if (Caller->optForMinSize()) { Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold); - else if (Caller->optForSize()) + // For minsize, we want to disable the single BB bonus and the vector + // bonuses, but not the last-call-to-static bonus. Inlining the last call to + // a static function will, at the minimum, eliminate the parameter setup and + // call/return instructions. + SingleBBBonusPercent = 0; + VectorBonusPercent = 0; + } else if (Caller->optForSize()) Threshold = MinIfValid(Threshold, Params.OptSizeThreshold); // Adjust the threshold based on inlinehint attribute and profile based @@ -706,6 +745,11 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { Threshold = Params.HotCallSiteThreshold.getValue(); } else if (isColdCallSite(CS, CallerBFI)) { DEBUG(dbgs() << "Cold callsite.\n"); + // Do not apply bonuses for a cold callsite including the + // LastCallToStatic bonus. While this bonus might result in code size + // reduction, it can cause the size of a non-cold caller to increase + // preventing it from being inlined. + DisallowAllBonuses(); Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); } } else { @@ -717,6 +761,11 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { Threshold = MaxIfValid(Threshold, Params.HintThreshold); } else if (PSI->isFunctionEntryCold(&Callee)) { DEBUG(dbgs() << "Cold callee.\n"); + // Do not apply bonuses for a cold callee including the + // LastCallToStatic bonus. While this bonus might result in code size + // reduction, it can cause the size of a non-cold caller to increase + // preventing it from being inlined. + DisallowAllBonuses(); Threshold = MinIfValid(Threshold, Params.ColdThreshold); } } @@ -726,6 +775,17 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) { // Finally, take the target-specific inlining threshold multiplier into // account. Threshold *= TTI.getInliningThresholdMultiplier(); + + SingleBBBonus = Threshold * SingleBBBonusPercent / 100; + VectorBonus = Threshold * VectorBonusPercent / 100; + + bool OnlyOneCallAndLocalLinkage = + F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); + // If there is only one call of the function, and it has internal linkage, + // the cost of inlining it drops dramatically. It may seem odd to update + // Cost in updateThreshold, but the bonus depends on the logic in this method. + if (OnlyOneCallAndLocalLinkage) + Cost -= LastCallToStaticBonus; } bool CallAnalyzer::visitCmpInst(CmpInst &I) { @@ -1295,31 +1355,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // Update the threshold based on callsite properties updateThreshold(CS, F); - FiftyPercentVectorBonus = 3 * Threshold / 2; - TenPercentVectorBonus = 3 * Threshold / 4; - - // Track whether the post-inlining function would have more than one basic - // block. A single basic block is often intended for inlining. Balloon the - // threshold by 50% until we pass the single-BB phase. - bool SingleBB = true; - int SingleBBBonus = Threshold / 2; - // Speculatively apply all possible bonuses to Threshold. If cost exceeds // this Threshold any time, and cost cannot decrease, we can stop processing // the rest of the function body. - Threshold += (SingleBBBonus + FiftyPercentVectorBonus); + Threshold += (SingleBBBonus + VectorBonus); // Give out bonuses for the callsite, as the instructions setting them up // will be gone after inlining. Cost -= getCallsiteCost(CS, DL); - // If there is only one call of the function, and it has internal linkage, - // the cost of inlining it drops dramatically. - bool OnlyOneCallAndLocalLinkage = - F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); - if (OnlyOneCallAndLocalLinkage) - Cost -= InlineConstants::LastCallToStaticBonus; - // If this function uses the coldcc calling convention, prefer not to inline // it. if (F.getCallingConv() == CallingConv::Cold) @@ -1387,6 +1431,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { BBSetVector; BBSetVector BBWorklist; BBWorklist.insert(&F.getEntryBlock()); + bool SingleBB = true; // Note that we *must not* cache the size, this loop grows the worklist. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { // Bail out the moment we cross the threshold. This means we'll under-count @@ -1451,6 +1496,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { } } + bool OnlyOneCallAndLocalLinkage = + F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); // If this is a noduplicate call, we can still inline as long as // inlining this would cause the removal of the caller (so the instruction // is not actually duplicated, just moved). @@ -1461,9 +1508,9 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // subtract the excess bonus, if any, from the Threshold before // comparing against Cost. if (NumVectorInstructions <= NumInstructions / 10) - Threshold -= FiftyPercentVectorBonus; + Threshold -= VectorBonus; else if (NumVectorInstructions <= NumInstructions / 2) - Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus); + Threshold -= VectorBonus/2; return Cost < std::max(1, Threshold); } |

