summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorEaswaran Raman <eraman@google.com>2016-03-03 18:26:33 +0000
committerEaswaran Raman <eraman@google.com>2016-03-03 18:26:33 +0000
commit3035719c86812d83e0bf9320ba5153a219f4635c (patch)
treedfe3444c2a1d041a5841fa8642cb5d76f76b4166 /llvm/lib
parentabcee45b7ad3a07359ac92cc2954f4ef489367ae (diff)
downloadbcm5719-llvm-3035719c86812d83e0bf9320ba5153a219f4635c.tar.gz
bcm5719-llvm-3035719c86812d83e0bf9320ba5153a219f4635c.zip
Infrastructure for PGO enhancements in inliner
This patch provides the following infrastructure for PGO enhancements in inliner: Enable the use of block level profile information in inliner Incremental update of block frequency information during inlining Update the function entry counts of callees when they get inlined into callers. Differential Revision: http://reviews.llvm.org/D16381 llvm-svn: 262636
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/InlineCost.cpp99
-rw-r--r--llvm/lib/Transforms/IPO/InlineSimple.cpp3
-rw-r--r--llvm/lib/Transforms/IPO/Inliner.cpp118
-rw-r--r--llvm/lib/Transforms/Utils/CloneFunction.cpp42
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp4
5 files changed, 221 insertions, 45 deletions
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index b4138b2909c..08c04d598ef 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -18,13 +18,18 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/InstVisitor.h"
@@ -85,6 +90,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
// easily cacheable. Instead, use the cover function paramHasAttr.
CallSite CandidateCS;
+ BlockFrequencyAnalysis *BFA;
int Threshold;
int Cost;
@@ -153,6 +159,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// passed to support analyzing indirect calls whose target is inferred by
/// analysis.
void updateThreshold(CallSite CS, Function &Callee);
+ /// Adjust Threshold based on CallSiteCount and return the adjusted threshold.
+ int getAdjustedThreshold(int Threshold, Optional<uint64_t> CallSiteCount);
// Custom analysis routines.
bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
@@ -194,17 +202,19 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
public:
CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
- Function &Callee, int Threshold, CallSite CSArg)
- : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), Threshold(Threshold),
- Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
- ExposesReturnsTwice(false), HasDynamicAlloca(false),
- ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
- HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
- NumVectorInstructions(0), FiftyPercentVectorBonus(0),
- TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
- NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
- NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
- SROACostSavings(0), SROACostSavingsLost(0) {}
+ Function &Callee, int Threshold, CallSite CSArg,
+ BlockFrequencyAnalysis *BFA)
+ : TTI(TTI), ACT(ACT), F(Callee), CandidateCS(CSArg), BFA(BFA),
+ Threshold(Threshold), Cost(0), IsCallerRecursive(false),
+ IsRecursiveCall(false), ExposesReturnsTwice(false),
+ HasDynamicAlloca(false), ContainsNoDuplicateCall(false),
+ HasReturn(false), HasIndirectBr(false), HasFrameEscape(false),
+ AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
+ FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
+ NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+ NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+ NumInstructionsSimplified(0), SROACostSavings(0),
+ SROACostSavingsLost(0) {}
bool analyzeCall(CallSite CS);
@@ -572,6 +582,15 @@ bool CallAnalyzer::isKnownNonNullInCallee(Value *V) {
return false;
}
+// Adjust the threshold based on callsite hotness. Currently this is a nop.
+int CallAnalyzer::getAdjustedThreshold(int Threshold,
+ Optional<uint64_t> CallSiteCount
+ __attribute__((unused))) {
+ // FIXME: The new threshold should be computed from the given Threshold and
+ // the callsite hotness.
+ return Threshold;
+}
+
void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
// If -inline-threshold is not given, listen to the optsize and minsize
// attributes when they would decrease the threshold.
@@ -596,6 +615,9 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
FunctionCount = Callee.getEntryCount().getValue();
MaxFunctionCount = Callee.getParent()->getMaximumFunctionCount().getValue();
}
+ Optional<uint64_t> CallSiteCount =
+ llvm::getBlockCount(CS.getInstruction()->getParent(), BFA);
+ Threshold = getAdjustedThreshold(Threshold, CallSiteCount);
// Listen to the inlinehint attribute or profile based hotness information
// when it would increase the threshold and the caller does not need to
@@ -912,7 +934,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.
- CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);
+ CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS,
+ BFA);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// threshold to get the bonus we want to apply, but don't go below zero.
@@ -1433,9 +1456,10 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,
TargetTransformInfo &CalleeTTI,
- AssumptionCacheTracker *ACT) {
+ AssumptionCacheTracker *ACT,
+ BlockFrequencyAnalysis *BFA) {
return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,
- ACT);
+ ACT, BFA);
}
int llvm::computeThresholdFromOptLevels(unsigned OptLevel,
@@ -1454,7 +1478,8 @@ int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; }
InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
int DefaultThreshold,
TargetTransformInfo &CalleeTTI,
- AssumptionCacheTracker *ACT) {
+ AssumptionCacheTracker *ACT,
+ BlockFrequencyAnalysis *BFA) {
// Cannot inline indirect calls.
if (!Callee)
@@ -1487,7 +1512,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
- CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS);
+ CallAnalyzer CA(CalleeTTI, ACT, *Callee, DefaultThreshold, CS, BFA);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());
@@ -1535,3 +1560,45 @@ bool llvm::isInlineViable(Function &F) {
return true;
}
+
+/// \brief Get estimated execution count for \p BB.
+Optional<uint64_t> llvm::getBlockCount(BasicBlock *BB,
+ BlockFrequencyAnalysis *BFA) {
+ if (!BFA)
+ return None;
+ Function *F = BB->getParent();
+ Optional<uint64_t> EntryCount = F->getEntryCount();
+ if (!EntryCount)
+ return None;
+ BlockFrequencyInfo *BFI = BFA->getBlockFrequencyInfo(F);
+ uint64_t BBFreq = BFI->getBlockFreq(BB).getFrequency();
+ uint64_t FunctionEntryFreq = BFI->getEntryFreq();
+ uint64_t BBCount = EntryCount.getValue() * BBFreq / FunctionEntryFreq;
+ return BBCount;
+}
+
+BlockFrequencyAnalysis::~BlockFrequencyAnalysis() {
+ for (auto &Entry : BFM) {
+ delete Entry.second;
+ }
+}
+
+/// \brief Get BlockFrequencyInfo for a function.
+BlockFrequencyInfo *BlockFrequencyAnalysis::getBlockFrequencyInfo(Function *F) {
+ auto Iter = BFM.find(F);
+ if (Iter != BFM.end())
+ return Iter->second;
+ // We need to create a BlockFrequencyInfo object for F and store it.
+ DominatorTree DT;
+ DT.recalculate(*F);
+ LoopInfo LI(DT);
+ BranchProbabilityInfo BPI(*F, LI);
+ BlockFrequencyInfo *BFI = new BlockFrequencyInfo(*F, BPI, LI);
+ BFM[F] = BFI;
+ return BFI;
+}
+
+/// \brief Invalidate BlockFrequencyInfo for a function.
+void BlockFrequencyAnalysis::invalidateBlockFrequencyInfo(Function *F) {
+ BFM.erase(F);
+}
diff --git a/llvm/lib/Transforms/IPO/InlineSimple.cpp b/llvm/lib/Transforms/IPO/InlineSimple.cpp
index a87c0d396db..cc37c9745db 100644
--- a/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -59,7 +59,8 @@ public:
InlineCost getInlineCost(CallSite CS) override {
Function *Callee = CS.getCalledFunction();
TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
- return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT);
+ return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT,
+ HasProfileData ? BFA.get() : nullptr);
}
bool runOnSCC(CallGraphSCC &SCC) override;
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 568707dddf2..8ed4bda54ad 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -47,10 +48,13 @@ STATISTIC(NumMergedAllocas, "Number of allocas merged together");
// if those would be more profitable and blocked inline steps.
STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
-Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {}
+Inliner::Inliner(char &ID)
+ : CallGraphSCCPass(ID), InsertLifetime(true),
+ BFA(new BlockFrequencyAnalysis()) {}
Inliner::Inliner(char &ID, bool InsertLifetime)
- : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
+ : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime),
+ BFA(new BlockFrequencyAnalysis()) {}
/// For this class, we declare that we require and preserve the call graph.
/// If the derived class implements this method, it should
@@ -259,7 +263,7 @@ bool Inliner::shouldInline(CallSite CS) {
Twine(IC.getCostDelta() + IC.getCost()) + ")");
return false;
}
-
+
// Try to detect the case where the current inlining candidate caller (call
// it B) is a static or linkonce-ODR function and is an inlining candidate
// elsewhere, and the current candidate callee (call it C) is large enough
@@ -356,8 +360,90 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
return false;
}
+/// \brief Update the frequency of a block that is cloned into the caller.
+/// This is invoked when \p OrigBB from the callee is cloned into \p NewBB in
+/// the caller.
+void Inliner::updateBlockFreq(CallSite &CS, const BasicBlock *OrigBB,
+ const BasicBlock *NewBB) {
+ if (!HasProfileData)
+ return;
+ Instruction *Call = CS.getInstruction();
+ BasicBlock *CallBB = Call->getParent();
+ BlockFrequencyInfo *CalleeBFI =
+ BFA->getBlockFrequencyInfo(CS.getCalledFunction());
+ BlockFrequencyInfo *CallerBFI =
+ BFA->getBlockFrequencyInfo(CallBB->getParent());
+ // Find the number of times OrigBB is executed per invocation of the callee
+ // and multiply by the number of times callee is executed in the caller.
+ // Freq(NewBB) = Freq(OrigBB) * CallSiteFreq / CalleeEntryFreq.
+ uint64_t CallSiteFreq = CallerBFI->getBlockFreq(CallBB).getFrequency();
+ uint64_t CalleeEntryFreq = CalleeBFI->getEntryFreq();
+ // Frequency of OrigBB in the callee.
+ BlockFrequency OrigBBFreq = CalleeBFI->getBlockFreq(OrigBB);
+ CallerBFI->setBlockFreq(NewBB, (double)(OrigBBFreq.getFrequency()) /
+ CalleeEntryFreq * CallSiteFreq);
+}
+
+/// \brief Update entry count of \p Callee after it got inlined at a callsite
+/// in block \p CallBB.
+void Inliner::updateEntryCount(BasicBlock *CallBB, Function *Callee) {
+ if (!HasProfileData)
+ return;
+ // If the callee has a original count of N, and the estimated count of
+ // callsite is M, the new callee count is set to N - M. M is estimated from
+ // the caller's entry count, its entry block frequency and the block frequency
+ // of the callsite.
+ Optional<uint64_t> CalleeCount = Callee->getEntryCount();
+ if (!CalleeCount)
+ return;
+ Optional<uint64_t> CallSiteCount = llvm::getBlockCount(CallBB, BFA.get());
+ if (!CallSiteCount)
+ return;
+ // Since CallSiteCount is an estimate, it could exceed the original callee
+ // count and has to be set to 0.
+ if (CallSiteCount.getValue() > CalleeCount.getValue()) {
+ Callee->setEntryCount(0);
+ DEBUG(llvm::dbgs() << "Estimated count of block " << CallBB->getName()
+ << " is " << CallSiteCount.getValue()
+ << " which exceeds the entry count "
+ << CalleeCount.getValue() << " of the callee "
+ << Callee->getName() << "\n");
+ } else
+ Callee->setEntryCount(CalleeCount.getValue() - CallSiteCount.getValue());
+}
+
+void Inliner::invalidateBFI(Function *F) {
+ if (!HasProfileData)
+ return;
+ if (F)
+ BFA->invalidateBlockFrequencyInfo(F);
+}
+void Inliner::invalidateBFI(CallGraphSCC &SCC) {
+ if (!HasProfileData)
+ return;
+ for (CallGraphNode *Node : SCC) {
+ Function *F = Node->getFunction();
+ invalidateBFI(F);
+ }
+}
+void Inliner::copyBlockFrequency(BasicBlock *Src, BasicBlock *Dst) {
+ if (!HasProfileData)
+ return;
+ Function *F = Src->getParent();
+ BlockFrequencyInfo *BFI = BFA->getBlockFrequencyInfo(F);
+ BFI->setBlockFreq(Dst, BFI->getBlockFreq(Src).getFrequency());
+}
+
+static bool hasProfileData(Module &M) {
+ // We check for the presence of MaxFunctionCount in the module.
+ // FIXME: This now only works for frontend based instrumentation.
+ return M.getMaximumFunctionCount().hasValue();
+}
+
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
+ using namespace std::placeholders;
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ HasProfileData = hasProfileData(CG.getModule());
ACT = &getAnalysis<AssumptionCacheTracker>();
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
@@ -419,7 +505,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, ACT);
// Now that we have all of the call sites, loop over them and inline them if
// it looks profitable to do so.
@@ -448,6 +533,10 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CS.getInstruction()->eraseFromParent();
++NumCallsDeleted;
} else {
+ Instruction *TheCall = CS.getInstruction();
+ BasicBlock *CallSiteBlock = TheCall->getParent();
+ Instruction *CallSuccessor = &*(++BasicBlock::iterator(TheCall));
+
// We can only inline direct calls to non-declarations.
if (!Callee || Callee->isDeclaration()) continue;
@@ -476,6 +565,11 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
continue;
}
+ BlockCloningFunctor BCF = nullptr;
+ if (HasProfileData)
+ BCF = std::bind(&Inliner::updateBlockFreq, this, CS, _1, _2);
+ InlineFunctionInfo InlineInfo(&CG, ACT, BCF);
+
// Attempt to inline the function.
if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas,
InlineHistoryID, InsertLifetime)) {
@@ -485,6 +579,13 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
Caller->getName()));
continue;
}
+ updateEntryCount(CallSiteBlock, Callee);
+ // The instruction following the call is part of a new basic block
+ // created during the inlining process. This does not have an entry in
+ // the BFI. We create an entry by copying the frequency of the original
+ // block containing the call.
+ copyBlockFrequency(CallSiteBlock, CallSuccessor->getParent());
+
++NumInlined;
// Report the inline decision.
@@ -523,7 +624,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CalleeNode->removeAllCalledFunctions();
// Removing the node for callee from the call graph and delete it.
- delete CG.removeFunctionFromModule(CalleeNode);
+ Function *F = CG.removeFunctionFromModule(CalleeNode);
+ invalidateBFI(F);
+ delete F;
++NumDeleted;
}
@@ -544,6 +647,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
}
} while (LocalChange);
+ invalidateBFI(SCC);
return Changed;
}
@@ -651,7 +755,9 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
FunctionsToRemove.end()),
FunctionsToRemove.end());
for (CallGraphNode *CGN : FunctionsToRemove) {
- delete CG.removeFunctionFromModule(CGN);
+ Function *F = CG.removeFunctionFromModule(CGN);
+ invalidateBFI(F);
+ delete F;
++NumDeleted;
}
return true;
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 05b0a17d9b8..ac0867a9ca5 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -277,9 +277,10 @@ namespace {
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
- void CloneBlock(const BasicBlock *BB,
+ void CloneBlock(const BasicBlock *BB,
BasicBlock::const_iterator StartingInst,
- std::vector<const BasicBlock*> &ToClone);
+ std::vector<const BasicBlock *> &ToClone,
+ BlockCloningFunctor Ftor = nullptr);
};
}
@@ -287,7 +288,8 @@ namespace {
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
BasicBlock::const_iterator StartingInst,
- std::vector<const BasicBlock*> &ToClone){
+ std::vector<const BasicBlock *> &ToClone,
+ BlockCloningFunctor Ftor) {
WeakVH &BBEntry = VMap[BB];
// Have we already cloned this block?
@@ -424,18 +426,19 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&
BB != &BB->getParent()->front();
}
+ // Call Ftor to tell BB has been cloned to NewBB
+ if (Ftor)
+ Ftor(BB, NewBB);
}
/// This works like CloneAndPruneFunctionInto, except that it does not clone the
/// entire function. Instead it starts at an instruction provided by the caller
/// and copies (and prunes) only the code reachable from that instruction.
-void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
- const Instruction *StartingInst,
- ValueToValueMapTy &VMap,
- bool ModuleLevelChanges,
- SmallVectorImpl<ReturnInst *> &Returns,
- const char *NameSuffix,
- ClonedCodeInfo *CodeInfo) {
+void llvm::CloneAndPruneIntoFromInst(
+ Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst,
+ ValueToValueMapTy &VMap, bool ModuleLevelChanges,
+ SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix,
+ ClonedCodeInfo *CodeInfo, BlockCloningFunctor Ftor) {
assert(NameSuffix && "NameSuffix cannot be null!");
ValueMapTypeRemapper *TypeMapper = nullptr;
@@ -461,11 +464,11 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
// Clone the entry block, and anything recursively reachable from it.
std::vector<const BasicBlock*> CloneWorklist;
- PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);
+ PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist, Ftor);
while (!CloneWorklist.empty()) {
const BasicBlock *BB = CloneWorklist.back();
CloneWorklist.pop_back();
- PFC.CloneBlock(BB, BB->begin(), CloneWorklist);
+ PFC.CloneBlock(BB, BB->begin(), CloneWorklist, Ftor);
}
// Loop over all of the basic blocks in the old function. If the block was
@@ -667,15 +670,14 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
/// constant arguments cause a significant amount of code in the callee to be
/// dead. Since this doesn't produce an exact copy of the input, it can't be
/// used for things like CloneFunction or CloneModule.
-void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
- ValueToValueMapTy &VMap,
- bool ModuleLevelChanges,
- SmallVectorImpl<ReturnInst*> &Returns,
- const char *NameSuffix,
- ClonedCodeInfo *CodeInfo,
- Instruction *TheCall) {
+void llvm::CloneAndPruneFunctionInto(
+ Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns,
+ const char *NameSuffix, ClonedCodeInfo *CodeInfo, Instruction *TheCall,
+ BlockCloningFunctor Ftor) {
CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
- ModuleLevelChanges, Returns, NameSuffix, CodeInfo);
+ ModuleLevelChanges, Returns, NameSuffix, CodeInfo,
+ Ftor);
}
/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap.
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 491b18e6fc7..923e5b231c4 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1319,7 +1319,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// If IFI has any state in it, zap it before we fill it in.
IFI.reset();
-
+
const Function *CalledFunc = CS.getCalledFunction();
if (!CalledFunc || // Can't inline external function or indirect
CalledFunc->isDeclaration() || // call, or call to a vararg function!
@@ -1486,7 +1486,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// happy with whatever the cloner can do.
CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
/*ModuleLevelChanges=*/false, Returns, ".i",
- &InlinedFunctionInfo, TheCall);
+ &InlinedFunctionInfo, TheCall, IFI.Ftor);
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
OpenPOWER on IntegriCloud