summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp99
1 files changed, 77 insertions, 22 deletions
diff --git a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 76358a040d9..2e841b2ac94 100644
--- a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -59,11 +59,13 @@
#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -73,6 +75,15 @@ using namespace PatternMatch;
STATISTIC(NumCallSiteSplit, "Number of call-site split");
+/// Only allow instructions before a call, if their CodeSize cost is below
+/// DuplicationThreshold. Those instructions need to be duplicated in all
+/// split blocks.
+static cl::opt<unsigned>
+ DuplicationThreshold("callsite-splitting-duplication-threshold", cl::Hidden,
+ cl::desc("Only allow instructions before a call, if "
+ "their cost is below DuplicationThreshold"),
+ cl::init(5));
+
static void addNonNullAttribute(CallSite CS, Value *Op) {
unsigned ArgNo = 0;
for (auto &I : CS.args()) {
@@ -168,20 +179,26 @@ static SmallVector<BasicBlock *, 2> getTwoPredecessors(BasicBlock *BB) {
return Preds;
}
-static bool canSplitCallSite(CallSite CS) {
+static bool canSplitCallSite(CallSite CS, TargetTransformInfo &TTI) {
// FIXME: As of now we handle only CallInst. InvokeInst could be handled
// without too much effort.
Instruction *Instr = CS.getInstruction();
if (!isa<CallInst>(Instr))
return false;
- // Allow splitting a call-site only when there is no instruction before the
- // call-site in the basic block. Based on this constraint, we only clone the
- // call instruction, and we do not move a call-site across any other
- // instruction.
BasicBlock *CallSiteBB = Instr->getParent();
- if (Instr != CallSiteBB->getFirstNonPHIOrDbg())
- return false;
+ // Allow splitting a call-site only when the CodeSize cost of the
+ // instructions before the call is less then DuplicationThreshold. The
+ // instructions before the call will be duplicated in the split blocks and
+ // corresponding uses will be updated.
+ unsigned Cost = 0;
+ for (auto &InstBeforeCall :
+ llvm::make_range(CallSiteBB->begin(), Instr->getIterator())) {
+ Cost += TTI.getInstructionCost(&InstBeforeCall,
+ TargetTransformInfo::TCK_CodeSize);
+ if (Cost >= DuplicationThreshold)
+ return false;
+ }
// Need 2 predecessors and cannot split an edge from an IndirectBrInst.
SmallVector<BasicBlock *, 2> Preds(predecessors(CallSiteBB));
@@ -246,16 +263,21 @@ static void splitCallSite(
CallPN = PHINode::Create(Instr->getType(), Preds.size(), "phi.call");
DEBUG(dbgs() << "split call-site : " << *Instr << " into \n");
- for (const auto &P : Preds) {
- BasicBlock *PredBB = P.first;
- BasicBlock *SplitBlock =
- SplitBlockPredecessors(TailBB, PredBB, ".predBB.split");
+
+ assert(Preds.size() == 2 && "The ValueToValueMaps array has size 2.");
+ // ValueToValueMapTy is neither copy nor moveable, so we use a simple array
+ // here.
+ ValueToValueMapTy ValueToValueMaps[2];
+ for (unsigned i = 0; i < Preds.size(); i++) {
+ BasicBlock *PredBB = Preds[i].first;
+ BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween(
+ TailBB, PredBB, &*std::next(Instr->getIterator()), ValueToValueMaps[i]);
assert(SplitBlock && "Unexpected new basic block split.");
- Instruction *NewCI = Instr->clone();
+ Instruction *NewCI =
+ &*std::prev(SplitBlock->getTerminator()->getIterator());
CallSite NewCS(NewCI);
- addConditions(NewCS, P.second);
- NewCI->insertBefore(&*SplitBlock->getFirstInsertionPt());
+ addConditions(NewCS, Preds[i].second);
// Handle PHIs used as arguments in the call-site.
for (PHINode &PN : TailBB->phis()) {
@@ -273,13 +295,41 @@ static void splitCallSite(
CallPN->addIncoming(NewCI, SplitBlock);
}
+ auto *OriginalBegin = &*TailBB->begin();
// Replace users of the original call with a PHI mering call-sites split.
if (CallPN) {
- CallPN->insertBefore(TailBB->getFirstNonPHI());
+ CallPN->insertBefore(OriginalBegin);
Instr->replaceAllUsesWith(CallPN);
}
- Instr->eraseFromParent();
+ // Remove instructions moved to split blocks from TailBB, from the duplicated
+ // call instruction to the beginning of the basic block. If an instruction
+ // has any uses, add a new PHI node to combine the values coming from the
+ // split blocks. The new PHI nodes are placed before the first original
+ // instruction, so we do not end up deleting them. By using reverse-order, we
+ // do not introduce unnecessary PHI nodes for def-use chains from the call
+ // instruction to the beginning of the block.
+ auto I = Instr->getReverseIterator();
+ while (I != TailBB->rend()) {
+ Instruction *CurrentI = &*I++;
+ if (!CurrentI->use_empty()) {
+ // If an existing PHI has users after the call, there is no need to create
+ // a new one.
+ if (isa<PHINode>(CurrentI))
+ continue;
+ PHINode *NewPN = PHINode::Create(CurrentI->getType(), Preds.size());
+ for (auto &Mapping : ValueToValueMaps)
+ NewPN->addIncoming(Mapping[CurrentI],
+ cast<Instruction>(Mapping[CurrentI])->getParent());
+ NewPN->insertBefore(&*TailBB->begin());
+ CurrentI->replaceAllUsesWith(NewPN);
+ }
+ CurrentI->eraseFromParent();
+ // We are done once we handled the first original instruction in TailBB.
+ if (CurrentI == OriginalBegin)
+ break;
+ }
+
NumCallSiteSplit++;
}
@@ -344,14 +394,15 @@ static bool tryToSplitOnPredicatedArgument(CallSite CS) {
return true;
}
-static bool tryToSplitCallSite(CallSite CS) {
- if (!CS.arg_size() || !canSplitCallSite(CS))
+static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI) {
+ if (!CS.arg_size() || !canSplitCallSite(CS, TTI))
return false;
return tryToSplitOnPredicatedArgument(CS) ||
tryToSplitOnPHIPredicatedArgument(CS);
}
-static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) {
+static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
+ TargetTransformInfo &TTI) {
bool Changed = false;
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) {
BasicBlock &BB = *BI++;
@@ -364,7 +415,7 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) {
Function *Callee = CS.getCalledFunction();
if (!Callee || Callee->isDeclaration())
continue;
- Changed |= tryToSplitCallSite(CS);
+ Changed |= tryToSplitCallSite(CS, TTI);
}
}
return Changed;
@@ -379,6 +430,7 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
@@ -387,7 +439,8 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
return false;
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
- return doCallSiteSplitting(F, TLI);
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ return doCallSiteSplitting(F, TLI, TTI);
}
};
} // namespace
@@ -396,6 +449,7 @@ char CallSiteSplittingLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(CallSiteSplittingLegacyPass, "callsite-splitting",
"Call-site splitting", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting",
"Call-site splitting", false, false)
FunctionPass *llvm::createCallSiteSplittingPass() {
@@ -405,8 +459,9 @@ FunctionPass *llvm::createCallSiteSplittingPass() {
PreservedAnalyses CallSiteSplittingPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
- if (!doCallSiteSplitting(F, TLI))
+ if (!doCallSiteSplitting(F, TLI, TTI))
return PreservedAnalyses::all();
PreservedAnalyses PA;
return PA;
OpenPOWER on IntegriCloud