summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Utils/InlineFunction.cpp
diff options
context:
space:
mode:
authorEaswaran Raman <eraman@google.com>2017-01-20 22:44:04 +0000
committerEaswaran Raman <eraman@google.com>2017-01-20 22:44:04 +0000
commit12585b0148a104dad35feed91f440836b2a97623 (patch)
treea006d63085616b0253c1e68b99116de08fce9c95 /llvm/lib/Transforms/Utils/InlineFunction.cpp
parent760ad4da6006422c5a6097232b5ffc72d6b906dd (diff)
downloadbcm5719-llvm-12585b0148a104dad35feed91f440836b2a97623.tar.gz
bcm5719-llvm-12585b0148a104dad35feed91f440836b2a97623.zip
Improve PGO support for the new inliner
This adds the following to the new PM based inliner in PGO mode: * Use block frequency analysis to derive callsite's profile count and use that to adjust thresholds of hot and cold callsites. * Incrementally update the BFI of the caller after a callee gets inlined into it. This incremental update is only within an invocation of the run method - BFI is not preserved across calls to run. Update the function entry count of the callee after inlining it into a caller. * I've tuned the thresholds for the hot and cold callsites using a hacked up version of the old inliner that explicitly computes BFI on a set of internal benchmarks and spec. Once the new PM based pipeline stabilizes (IIRC Chandler mentioned there are known issues) I'll benchmark this again and adjust the thresholds if required. Inliner PGO support. Differential revision: https://reviews.llvm.org/D28331 llvm-svn: 292666
Diffstat (limited to 'llvm/lib/Transforms/Utils/InlineFunction.cpp')
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp72
1 files changed, 68 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index a40079ca8e7..86f40c32dc0 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/EHPersonalities.h"
@@ -40,8 +41,8 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
using namespace llvm;
@@ -1393,6 +1394,56 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
}
}
}
+/// Update the block frequencies of the caller after a callee has been inlined.
+///
+/// Each block cloned into the caller has its block frequency scaled by the
+/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of
+/// callee's entry block gets the same frequency as the callsite block and the
+/// relative frequencies of all cloned blocks remain the same after cloning.
+static void updateCallerBFI(BasicBlock *CallSiteBlock,
+ const ValueToValueMapTy &VMap,
+ BlockFrequencyInfo *CallerBFI,
+ BlockFrequencyInfo *CalleeBFI,
+ const BasicBlock &CalleeEntryBlock) {
+ SmallPtrSet<BasicBlock *, 16> ClonedBBs;
+ for (auto const &Entry : VMap) {
+ if (!isa<BasicBlock>(Entry.first) || !Entry.second)
+ continue;
+ auto *OrigBB = cast<BasicBlock>(Entry.first);
+ auto *ClonedBB = cast<BasicBlock>(Entry.second);
+ ClonedBBs.insert(ClonedBB);
+ CallerBFI->setBlockFreq(ClonedBB,
+ CalleeBFI->getBlockFreq(OrigBB).getFrequency());
+ }
+ BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));
+ CallerBFI->setBlockFreqAndScale(
+ EntryClone, CallerBFI->getBlockFreq(CallSiteBlock).getFrequency(),
+ ClonedBBs);
+}
+
+/// Update the entry count of callee after inlining.
+///
+/// The callsite's block count is subtracted from the callee's function entry
+/// count.
+static void updateCalleeCount(BlockFrequencyInfo &CallerBFI, BasicBlock *CallBB,
+ Function *Callee) {
+ // If the callee has a original count of N, and the estimated count of
+ // callsite is M, the new callee count is set to N - M. M is estimated from
+ // the caller's entry count, its entry block frequency and the block frequency
+ // of the callsite.
+ Optional<uint64_t> CalleeCount = Callee->getEntryCount();
+ if (!CalleeCount)
+ return;
+ Optional<uint64_t> CallSiteCount = CallerBFI.getBlockProfileCount(CallBB);
+ if (!CallSiteCount)
+ return;
+ // Since CallSiteCount is an estimate, it could exceed the original callee
+ // count and has to be set to 0.
+ if (CallSiteCount.getValue() > CalleeCount.getValue())
+ Callee->setEntryCount(0);
+ else
+ Callee->setEntryCount(CalleeCount.getValue() - CallSiteCount.getValue());
+}
/// This function inlines the called function into the basic block of the
/// caller. This returns false if it is not possible to inline this call.
@@ -1410,8 +1461,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// If IFI has any state in it, zap it before we fill it in.
IFI.reset();
-
- const Function *CalledFunc = CS.getCalledFunction();
+
+ Function *CalledFunc = CS.getCalledFunction();
if (!CalledFunc || // Can't inline external function or indirect
CalledFunc->isDeclaration() || // call, or call to a vararg function!
CalledFunc->getFunctionType()->isVarArg()) return false;
@@ -1578,10 +1629,17 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
/*ModuleLevelChanges=*/false, Returns, ".i",
&InlinedFunctionInfo, TheCall);
-
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
+ if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) {
+ // Update the BFI of blocks cloned into the caller.
+ updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
+ CalledFunc->front());
+ // Update the profile count of callee.
+ updateCalleeCount(*IFI.CallerBFI, OrigBB, CalledFunc);
+ }
+
// Inject byval arguments initialization.
for (std::pair<Value*, Value*> &Init : ByValInit)
HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
@@ -2087,6 +2145,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
CalledFunc->getName() + ".exit");
}
+ if (IFI.CallerBFI) {
+ // Copy original BB's block frequency to AfterCallBB
+ IFI.CallerBFI->setBlockFreq(
+ AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency());
+ }
+
// Change the branch that used to go to AfterCallBB to branch to the first
// basic block of the inlined function.
//
OpenPOWER on IntegriCloud