diff options
-rw-r--r-- | llvm/lib/Analysis/InlineCost.cpp | 175 | ||||
-rw-r--r-- | llvm/test/Transforms/Inline/inline-indirect-chain.ll | 55 |
2 files changed, 84 insertions, 146 deletions
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index 8c68dd4d390..1ba03de6989 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -51,7 +51,7 @@ static cl::opt<int> InlineThreshold( cl::desc("Control the amount of inlining to perform (default = 225)")); static cl::opt<int> HintThreshold( - "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, + "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint")); static cl::opt<int> @@ -63,7 +63,7 @@ static cl::opt<int> // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. static cl::opt<int> ColdThreshold( - "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, + "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute")); static cl::opt<int> @@ -149,9 +149,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { bool HasUninlineableIntrinsic = false; bool InitsVargArgs = false; - /// Attempt to evaluate indirect calls to boost its inline cost. - bool BoostIndirectCalls; - /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize = 0; unsigned NumInstructions = 0; @@ -298,14 +295,13 @@ public: std::function<AssumptionCache &(Function &)> &GetAssumptionCache, Optional<function_ref<BlockFrequencyInfo &(Function &)>> &GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, - Function &Callee, CallBase &Call, const InlineParams &Params, - bool BoostIndirect = true) + Function &Callee, CallBase &Call, const InlineParams &Params) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), CandidateCall(Call), Params(Params), Threshold(Params.DefaultThreshold), ComputeFullInlineCost(OptComputeFullInlineCost || Params.ComputeFullInlineCost || ORE), - BoostIndirectCalls(BoostIndirect), EnableLoadElimination(true) {} + EnableLoadElimination(true) {} InlineResult analyzeCall(CallBase &Call); @@ -427,9 +423,9 @@ bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { Operands.push_back(GEP.getOperand(0)); for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) if (Constant *SimpleOp = SimplifiedValues.lookup(*I)) - Operands.push_back(SimpleOp); - else - Operands.push_back(*I); + Operands.push_back(SimpleOp); + else + Operands.push_back(*I); return TargetTransformInfo::TCC_Free == TTI.getUserCost(&GEP, Operands); } @@ -1243,100 +1239,97 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) { if (isa<CallInst>(Call) && cast<CallInst>(Call).cannotDuplicate()) ContainsNoDuplicateCall = true; - Value *Callee = Call.getCalledOperand(); - Function *F = dyn_cast_or_null<Function>(Callee); - bool IsIndirectCall = !F; - if (IsIndirectCall) { - // Check if this happens to be an indirect function call to a known function - // in this inline context. If not, we've done all we can. - F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); - if (!F) { - // Pay the price of the argument setup. We account for the average 1 - // instruction per call argument setup here. - addCost(Call.arg_size() * InlineConstants::InstrCost); + if (Function *F = Call.getCalledFunction()) { + // When we have a concrete function, first try to simplify it directly. + if (simplifyCallSite(F, Call)) + return true; - // Everything other than inline ASM will also have a significant cost - // merely from making the call. - if (!isa<InlineAsm>(Callee)) - addCost(InlineConstants::CallPenalty); + // Next check if it is an intrinsic we know about. + // FIXME: Lift this into part of the InstVisitor. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) { + switch (II->getIntrinsicID()) { + default: + if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) + disableLoadElimination(); + return Base::visitCallBase(Call); + + case Intrinsic::load_relative: + // This is normally lowered to 4 LLVM instructions. + addCost(3 * InlineConstants::InstrCost); + return false; - if (!Call.onlyReadsMemory()) + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: disableLoadElimination(); - return Base::visitCallBase(Call); + // SROA can usually chew through these intrinsics, but they aren't free. + return false; + case Intrinsic::icall_branch_funnel: + case Intrinsic::localescape: + HasUninlineableIntrinsic = true; + return false; + case Intrinsic::vastart: + InitsVargArgs = true; + return false; + } } - } - assert(F && "Expected a call to a known function"); - - // When we have a concrete function, first try to simplify it directly. - if (simplifyCallSite(F, Call)) - return true; + if (F == Call.getFunction()) { + // This flag will fully abort the analysis, so don't bother with anything + // else. + IsRecursiveCall = true; + return false; + } - // Next check if it is an intrinsic we know about. - // FIXME: Lift this into part of the InstVisitor. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Call)) { - switch (II->getIntrinsicID()) { - default: - if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) - disableLoadElimination(); - return Base::visitCallBase(Call); + if (TTI.isLoweredToCall(F)) { + // We account for the average 1 instruction per call argument setup + // here. + addCost(Call.arg_size() * InlineConstants::InstrCost); - case Intrinsic::load_relative: - // This is normally lowered to 4 LLVM instructions. - addCost(3 * InlineConstants::InstrCost); - return false; + // Everything other than inline ASM will also have a significant cost + // merely from making the call. + if (!isa<InlineAsm>(Call.getCalledValue())) + addCost(InlineConstants::CallPenalty); + } - case Intrinsic::memset: - case Intrinsic::memcpy: - case Intrinsic::memmove: + if (!Call.onlyReadsMemory()) disableLoadElimination(); - // SROA can usually chew through these intrinsics, but they aren't free. - return false; - case Intrinsic::icall_branch_funnel: - case Intrinsic::localescape: - HasUninlineableIntrinsic = true; - return false; - case Intrinsic::vastart: - InitsVargArgs = true; - return false; - } + return Base::visitCallBase(Call); } - if (F == Call.getFunction()) { - // This flag will fully abort the analysis, so don't bother with anything - // else. - IsRecursiveCall = true; - return false; + // Otherwise we're in a very special case -- an indirect function call. See + // if we can be particularly clever about this. + Value *Callee = Call.getCalledValue(); + + // First, pay the price of the argument setup. We account for the average + // 1 instruction per call argument setup here. + addCost(Call.arg_size() * InlineConstants::InstrCost); + + // Next, check if this happens to be an indirect function call to a known + // function in this inline context. If not, we've done all we can. + Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); + if (!F) { + if (!Call.onlyReadsMemory()) + disableLoadElimination(); + return Base::visitCallBase(Call); } - if (TTI.isLoweredToCall(F)) { - // We account for the average 1 instruction per call argument setup here. - addCost(Call.arg_size() * InlineConstants::InstrCost); - - // If we have a constant that we are calling as a function, we can peer - // through it and see the function target. This happens not infrequently - // during devirtualization and so we want to give it a hefty bonus for - // inlining, but cap that bonus in the event that inlining wouldn't pan out. - // Pretend to inline the function, with a custom threshold. - if (IsIndirectCall && BoostIndirectCalls) { - auto IndirectCallParams = Params; - IndirectCallParams.DefaultThreshold = - InlineConstants::IndirectCallThreshold; - CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call, - IndirectCallParams, false); - if (CA.analyzeCall(Call)) { - // We were able to inline the indirect call! Subtract the cost from the - // threshold to get the bonus we want to apply, but don't go below zero. - Cost -= std::max(0, CA.getThreshold() - CA.getCost()); - } else - // Otherwise simply add the cost for merely making the call. - addCost(InlineConstants::CallPenalty); - } else - // Otherwise simply add the cost for merely making the call. - addCost(InlineConstants::CallPenalty); + // If we have a constant that we are calling as a function, we can peer + // through it and see the function target. This happens not infrequently + // during devirtualization and so we want to give it a hefty bonus for + // inlining, but cap that bonus in the event that inlining wouldn't pan + // out. Pretend to inline the function, with a custom threshold. + auto IndirectCallParams = Params; + IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; + CallAnalyzer CA(TTI, GetAssumptionCache, GetBFI, PSI, ORE, *F, Call, + IndirectCallParams); + if (CA.analyzeCall(Call)) { + // We were able to inline the indirect call! Subtract the cost from the + // threshold to get the bonus we want to apply, but don't go below zero. + Cost -= std::max(0, CA.getThreshold() - CA.getCost()); } - if (!Call.onlyReadsMemory() || (IsIndirectCall && !F->onlyReadsMemory())) + if (!F->onlyReadsMemory()) disableLoadElimination(); return Base::visitCallBase(Call); } @@ -1501,7 +1494,7 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { int64_t ExpectedNumberOfCompare = 3 * (int64_t)NumCaseCluster / 2 - 1; int64_t SwitchCost = - ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; + ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; addCost(SwitchCost, (int64_t)CostUpperBound); return false; diff --git a/llvm/test/Transforms/Inline/inline-indirect-chain.ll b/llvm/test/Transforms/Inline/inline-indirect-chain.ll deleted file mode 100644 index bf73ad35dad..00000000000 --- a/llvm/test/Transforms/Inline/inline-indirect-chain.ll +++ /dev/null @@ -1,55 +0,0 @@ -; RUN: opt -inline -early-cse < %s -; This test used to crash (PR35469). - -define void @func1() { - %t = bitcast void ()* @func2 to void ()* - tail call void %t() - ret void -} - -define void @func2() { - %t = bitcast void ()* @func3 to void ()* - tail call void %t() - ret void -} - -define void @func3() { - %t = bitcast void ()* @func4 to void ()* - tail call void %t() - ret void -} - -define void @func4() { - br i1 undef, label %left, label %right - -left: - %t = bitcast void ()* @func5 to void ()* - tail call void %t() - ret void - -right: - ret void -} - -define void @func5() { - %t = bitcast void ()* @func6 to void ()* - tail call void %t() - ret void -} - -define void @func6() { - %t = bitcast void ()* @func2 to void ()* - tail call void %t() - ret void -} - -define void @func7() { - %t = bitcast void ()* @func3 to void ()* - tail call void @func8(void()* %t) - ret void -} - -define void @func8(void()* %f) { - tail call void %f() - ret void -} |