diff options
author | Florian Hahn <florian.hahn@arm.com> | 2018-02-14 13:59:12 +0000 |
---|---|---|
committer | Florian Hahn <florian.hahn@arm.com> | 2018-02-14 13:59:12 +0000 |
commit | b4e3bad89bc0922133f29064c87bb59a3935af39 (patch) | |
tree | f7ca9c86105b911671fcedcb71851e79b9374668 /llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp | |
parent | de300e66bbff85c70a9e69d536f10488c899ef9f (diff) | |
download | bcm5719-llvm-b4e3bad89bc0922133f29064c87bb59a3935af39.tar.gz bcm5719-llvm-b4e3bad89bc0922133f29064c87bb59a3935af39.zip |
Recommit r325001: [CallSiteSplitting] Support splitting of blocks with instrs before call.
For basic blocks with instructions between the beginning of the block
and a call we have to duplicate the instructions before the call in all
split blocks and add PHI nodes for uses of the duplicated instructions
after the call.
Currently, the threshold for the number of instructions before a call
is quite low, to keep the impact on binary size low.
Reviewers: junbuml, mcrosier, davidxl, davide
Reviewed By: junbuml
Differential Revision: https://reviews.llvm.org/D41860
llvm-svn: 325126
Diffstat (limited to 'llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp | 99 |
1 files changed, 77 insertions, 22 deletions
diff --git a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp index 76358a040d9..2e841b2ac94 100644 --- a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp +++ b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp @@ -59,11 +59,13 @@ #include "llvm/Transforms/Scalar/CallSiteSplitting.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -73,6 +75,15 @@ using namespace PatternMatch; STATISTIC(NumCallSiteSplit, "Number of call-site split"); +/// Only allow instructions before a call, if their CodeSize cost is below +/// DuplicationThreshold. Those instructions need to be duplicated in all +/// split blocks. +static cl::opt<unsigned> + DuplicationThreshold("callsite-splitting-duplication-threshold", cl::Hidden, + cl::desc("Only allow instructions before a call, if " + "their cost is below DuplicationThreshold"), + cl::init(5)); + static void addNonNullAttribute(CallSite CS, Value *Op) { unsigned ArgNo = 0; for (auto &I : CS.args()) { @@ -168,20 +179,26 @@ static SmallVector<BasicBlock *, 2> getTwoPredecessors(BasicBlock *BB) { return Preds; } -static bool canSplitCallSite(CallSite CS) { +static bool canSplitCallSite(CallSite CS, TargetTransformInfo &TTI) { // FIXME: As of now we handle only CallInst. InvokeInst could be handled // without too much effort. Instruction *Instr = CS.getInstruction(); if (!isa<CallInst>(Instr)) return false; - // Allow splitting a call-site only when there is no instruction before the - // call-site in the basic block. Based on this constraint, we only clone the - // call instruction, and we do not move a call-site across any other - // instruction. BasicBlock *CallSiteBB = Instr->getParent(); - if (Instr != CallSiteBB->getFirstNonPHIOrDbg()) - return false; + // Allow splitting a call-site only when the CodeSize cost of the + // instructions before the call is less then DuplicationThreshold. The + // instructions before the call will be duplicated in the split blocks and + // corresponding uses will be updated. + unsigned Cost = 0; + for (auto &InstBeforeCall : + llvm::make_range(CallSiteBB->begin(), Instr->getIterator())) { + Cost += TTI.getInstructionCost(&InstBeforeCall, + TargetTransformInfo::TCK_CodeSize); + if (Cost >= DuplicationThreshold) + return false; + } // Need 2 predecessors and cannot split an edge from an IndirectBrInst. SmallVector<BasicBlock *, 2> Preds(predecessors(CallSiteBB)); @@ -246,16 +263,21 @@ static void splitCallSite( CallPN = PHINode::Create(Instr->getType(), Preds.size(), "phi.call"); DEBUG(dbgs() << "split call-site : " << *Instr << " into \n"); - for (const auto &P : Preds) { - BasicBlock *PredBB = P.first; - BasicBlock *SplitBlock = - SplitBlockPredecessors(TailBB, PredBB, ".predBB.split"); + + assert(Preds.size() == 2 && "The ValueToValueMaps array has size 2."); + // ValueToValueMapTy is neither copy nor moveable, so we use a simple array + // here. + ValueToValueMapTy ValueToValueMaps[2]; + for (unsigned i = 0; i < Preds.size(); i++) { + BasicBlock *PredBB = Preds[i].first; + BasicBlock *SplitBlock = DuplicateInstructionsInSplitBetween( + TailBB, PredBB, &*std::next(Instr->getIterator()), ValueToValueMaps[i]); assert(SplitBlock && "Unexpected new basic block split."); - Instruction *NewCI = Instr->clone(); + Instruction *NewCI = + &*std::prev(SplitBlock->getTerminator()->getIterator()); CallSite NewCS(NewCI); - addConditions(NewCS, P.second); - NewCI->insertBefore(&*SplitBlock->getFirstInsertionPt()); + addConditions(NewCS, Preds[i].second); // Handle PHIs used as arguments in the call-site. for (PHINode &PN : TailBB->phis()) { @@ -273,13 +295,41 @@ static void splitCallSite( CallPN->addIncoming(NewCI, SplitBlock); } + auto *OriginalBegin = &*TailBB->begin(); // Replace users of the original call with a PHI mering call-sites split. if (CallPN) { - CallPN->insertBefore(TailBB->getFirstNonPHI()); + CallPN->insertBefore(OriginalBegin); Instr->replaceAllUsesWith(CallPN); } - Instr->eraseFromParent(); + // Remove instructions moved to split blocks from TailBB, from the duplicated + // call instruction to the beginning of the basic block. If an instruction + // has any uses, add a new PHI node to combine the values coming from the + // split blocks. The new PHI nodes are placed before the first original + // instruction, so we do not end up deleting them. By using reverse-order, we + // do not introduce unnecessary PHI nodes for def-use chains from the call + // instruction to the beginning of the block. + auto I = Instr->getReverseIterator(); + while (I != TailBB->rend()) { + Instruction *CurrentI = &*I++; + if (!CurrentI->use_empty()) { + // If an existing PHI has users after the call, there is no need to create + // a new one. + if (isa<PHINode>(CurrentI)) + continue; + PHINode *NewPN = PHINode::Create(CurrentI->getType(), Preds.size()); + for (auto &Mapping : ValueToValueMaps) + NewPN->addIncoming(Mapping[CurrentI], + cast<Instruction>(Mapping[CurrentI])->getParent()); + NewPN->insertBefore(&*TailBB->begin()); + CurrentI->replaceAllUsesWith(NewPN); + } + CurrentI->eraseFromParent(); + // We are done once we handled the first original instruction in TailBB. + if (CurrentI == OriginalBegin) + break; + } + NumCallSiteSplit++; } @@ -344,14 +394,15 @@ static bool tryToSplitOnPredicatedArgument(CallSite CS) { return true; } -static bool tryToSplitCallSite(CallSite CS) { - if (!CS.arg_size() || !canSplitCallSite(CS)) +static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI) { + if (!CS.arg_size() || !canSplitCallSite(CS, TTI)) return false; return tryToSplitOnPredicatedArgument(CS) || tryToSplitOnPHIPredicatedArgument(CS); } -static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) { +static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI, + TargetTransformInfo &TTI) { bool Changed = false; for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) { BasicBlock &BB = *BI++; @@ -364,7 +415,7 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI) { Function *Callee = CS.getCalledFunction(); if (!Callee || Callee->isDeclaration()) continue; - Changed |= tryToSplitCallSite(CS); + Changed |= tryToSplitCallSite(CS, TTI); } } return Changed; @@ -379,6 +430,7 @@ struct CallSiteSplittingLegacyPass : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); FunctionPass::getAnalysisUsage(AU); } @@ -387,7 +439,8 @@ struct CallSiteSplittingLegacyPass : public FunctionPass { return false; auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); - return doCallSiteSplitting(F, TLI); + auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + return doCallSiteSplitting(F, TLI, TTI); } }; } // namespace @@ -396,6 +449,7 @@ char CallSiteSplittingLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(CallSiteSplittingLegacyPass, "callsite-splitting", "Call-site splitting", false, false) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting", "Call-site splitting", false, false) FunctionPass *llvm::createCallSiteSplittingPass() { @@ -405,8 +459,9 @@ FunctionPass *llvm::createCallSiteSplittingPass() { PreservedAnalyses CallSiteSplittingPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + auto &TTI = AM.getResult<TargetIRAnalysis>(F); - if (!doCallSiteSplitting(F, TLI)) + if (!doCallSiteSplitting(F, TLI, TTI)) return PreservedAnalyses::all(); PreservedAnalyses PA; return PA; |