summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorEaswaran Raman <eraman@google.com>2017-07-28 21:47:36 +0000
committerEaswaran Raman <eraman@google.com>2017-07-28 21:47:36 +0000
commit51b809bf2f6e81be5d2390acaee09cbe1e26949c (patch)
tree1c88273f523ecb1a3c4a72d67add5b6c3993a049 /llvm/lib
parent3bcad2b2e671e3ef948a4d537143274ae1a89609 (diff)
downloadbcm5719-llvm-51b809bf2f6e81be5d2390acaee09cbe1e26949c.tar.gz
bcm5719-llvm-51b809bf2f6e81be5d2390acaee09cbe1e26949c.zip
[Inliner] Do not apply any bonus for cold callsites.
Summary: Inlining threshold is increased by application of bonuses when the callee has a single reachable basic block or is rich in vector instructions. Similarly, inlining cost is reduced by applying a large bonus when the last call to a static function is considered for inlining. This patch disables the application of these bonuses when the callsite or the callee is cold. The intention here is to prevent a large cold callsite from being inlined to a non-cold caller that could prevent the caller from being inlined. This is especially important when the cold callsite is a last call to a static since the associated bonus is very high. Reviewers: chandlerc, davidxl Subscribers: danielcdh, llvm-commits Differential Revision: https://reviews.llvm.org/D35823 llvm-svn: 309441
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/InlineCost.cpp103
1 files changed, 75 insertions, 28 deletions
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 8be2ee7881a..f82c802c8a2 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -119,8 +119,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
unsigned NumInstructions, NumVectorInstructions;
- int FiftyPercentVectorBonus, TenPercentVectorBonus;
- int VectorBonus;
+ int VectorBonus, TenPercentVectorBonus;
+ // Bonus to be applied when the callee has only one reachable basic block.
+ int SingleBBBonus;
/// While we walk the potentially-inlined instructions, we build up and
/// maintain a mapping of simplified values specific to this callsite. The
@@ -235,11 +236,11 @@ public:
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
- NumVectorInstructions(0), FiftyPercentVectorBonus(0),
- TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
- NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
- NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
- SROACostSavings(0), SROACostSavingsLost(0) {}
+ NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0),
+ NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+ NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+ NumInstructionsSimplified(0), SROACostSavings(0),
+ SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
@@ -678,11 +679,49 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
return B ? std::max(A, B.getValue()) : A;
};
+ // Various bonus percentages. These are multiplied by Threshold to get the
+ // bonus values.
+ // SingleBBBonus: This bonus is applied if the callee has a single reachable
+ // basic block at the given callsite context. This is speculatively applied
+ // and withdrawn if more than one basic block is seen.
+ //
+ // Vector bonuses: We want to more aggressively inline vector-dense kernels
+ // and apply this bonus based on the percentage of vector instructions. A
+ // bonus is applied if the vector instructions exceed 50% and half that amount
+ // is applied if it exceeds 10%. Note that these bonuses are some what
+ // arbitrary and evolved over time by accident as much as because they are
+ // principled bonuses.
+ // FIXME: It would be nice to base the bonus values on something more
+ // scientific.
+ //
+ // LstCallToStaticBonus: This large bonus is applied to ensure the inlining
+ // of the last call to a static function as inlining such functions is
+ // guaranteed to reduce code size.
+ //
+ // These bonus percentages may be set to 0 based on properties of the caller
+ // and the callsite.
+ int SingleBBBonusPercent = 50;
+ int VectorBonusPercent = 150;
+ int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
+
+ // Lambda to set all the above bonus and bonus percentages to 0.
+ auto DisallowAllBonuses = [&]() {
+ SingleBBBonusPercent = 0;
+ VectorBonusPercent = 0;
+ LastCallToStaticBonus = 0;
+ };
+
// Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
// and reduce the threshold if the caller has the necessary attribute.
- if (Caller->optForMinSize())
+ if (Caller->optForMinSize()) {
Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
- else if (Caller->optForSize())
+ // For minsize, we want to disable the single BB bonus and the vector
+ // bonuses, but not the last-call-to-static bonus. Inlining the last call to
+ // a static function will, at the minimum, eliminate the parameter setup and
+ // call/return instructions.
+ SingleBBBonusPercent = 0;
+ VectorBonusPercent = 0;
+ } else if (Caller->optForSize())
Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
// Adjust the threshold based on inlinehint attribute and profile based
@@ -706,6 +745,11 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
Threshold = Params.HotCallSiteThreshold.getValue();
} else if (isColdCallSite(CS, CallerBFI)) {
DEBUG(dbgs() << "Cold callsite.\n");
+ // Do not apply bonuses for a cold callsite including the
+ // LastCallToStatic bonus. While this bonus might result in code size
+ // reduction, it can cause the size of a non-cold caller to increase
+ // preventing it from being inlined.
+ DisallowAllBonuses();
Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold);
}
} else {
@@ -717,6 +761,11 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
Threshold = MaxIfValid(Threshold, Params.HintThreshold);
} else if (PSI->isFunctionEntryCold(&Callee)) {
DEBUG(dbgs() << "Cold callee.\n");
+ // Do not apply bonuses for a cold callee including the
+ // LastCallToStatic bonus. While this bonus might result in code size
+ // reduction, it can cause the size of a non-cold caller to increase
+ // preventing it from being inlined.
+ DisallowAllBonuses();
Threshold = MinIfValid(Threshold, Params.ColdThreshold);
}
}
@@ -726,6 +775,17 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// Finally, take the target-specific inlining threshold multiplier into
// account.
Threshold *= TTI.getInliningThresholdMultiplier();
+
+ SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
+ VectorBonus = Threshold * VectorBonusPercent / 100;
+
+ bool OnlyOneCallAndLocalLinkage =
+ F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
+ // If there is only one call of the function, and it has internal linkage,
+ // the cost of inlining it drops dramatically. It may seem odd to update
+ // Cost in updateThreshold, but the bonus depends on the logic in this method.
+ if (OnlyOneCallAndLocalLinkage)
+ Cost -= LastCallToStaticBonus;
}
bool CallAnalyzer::visitCmpInst(CmpInst &I) {
@@ -1295,31 +1355,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// Update the threshold based on callsite properties
updateThreshold(CS, F);
- FiftyPercentVectorBonus = 3 * Threshold / 2;
- TenPercentVectorBonus = 3 * Threshold / 4;
-
- // Track whether the post-inlining function would have more than one basic
- // block. A single basic block is often intended for inlining. Balloon the
- // threshold by 50% until we pass the single-BB phase.
- bool SingleBB = true;
- int SingleBBBonus = Threshold / 2;
-
// Speculatively apply all possible bonuses to Threshold. If cost exceeds
// this Threshold any time, and cost cannot decrease, we can stop processing
// the rest of the function body.
- Threshold += (SingleBBBonus + FiftyPercentVectorBonus);
+ Threshold += (SingleBBBonus + VectorBonus);
// Give out bonuses for the callsite, as the instructions setting them up
// will be gone after inlining.
Cost -= getCallsiteCost(CS, DL);
- // If there is only one call of the function, and it has internal linkage,
- // the cost of inlining it drops dramatically.
- bool OnlyOneCallAndLocalLinkage =
- F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
- if (OnlyOneCallAndLocalLinkage)
- Cost -= InlineConstants::LastCallToStaticBonus;
-
// If this function uses the coldcc calling convention, prefer not to inline
// it.
if (F.getCallingConv() == CallingConv::Cold)
@@ -1387,6 +1431,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
BBSetVector;
BBSetVector BBWorklist;
BBWorklist.insert(&F.getEntryBlock());
+ bool SingleBB = true;
// Note that we *must not* cache the size, this loop grows the worklist.
for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
// Bail out the moment we cross the threshold. This means we'll under-count
@@ -1451,6 +1496,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
}
}
+ bool OnlyOneCallAndLocalLinkage =
+ F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
// If this is a noduplicate call, we can still inline as long as
// inlining this would cause the removal of the caller (so the instruction
// is not actually duplicated, just moved).
@@ -1461,9 +1508,9 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// subtract the excess bonus, if any, from the Threshold before
// comparing against Cost.
if (NumVectorInstructions <= NumInstructions / 10)
- Threshold -= FiftyPercentVectorBonus;
+ Threshold -= VectorBonus;
else if (NumVectorInstructions <= NumInstructions / 2)
- Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
+ Threshold -= VectorBonus/2;
return Cost < std::max(1, Threshold);
}
OpenPOWER on IntegriCloud