diff options
author | David Green <david.green@arm.com> | 2019-04-23 08:52:21 +0000 |
---|---|---|
committer | David Green <david.green@arm.com> | 2019-04-23 08:52:21 +0000 |
commit | 63a2aa715ad00e52ed1a7695f5a40d984fdaec24 (patch) | |
tree | 4497f8cf258ee2dcc3fb7e7b78b22562b246bd96 /llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | |
parent | ed4f48d290e342657798437538c8aa8e1ef35c72 (diff) | |
download | bcm5719-llvm-63a2aa715ad00e52ed1a7695f5a40d984fdaec24.tar.gz bcm5719-llvm-63a2aa715ad00e52ed1a7695f5a40d984fdaec24.zip |
[LSR] Limit the recursion for setup cost
In some circumstances we can end up with setup costs that are very complex to
compute, even though the scevs are not very complex to create. This can also
lead to setupcosts that are calculated to be exactly -1, which LSR treats as an
invalid cost. This patch puts a limit on the recursion depth for setup cost to
prevent them taking too long.
Thanks to @reames for the report and test case.
Differential Revision: https://reviews.llvm.org/D60944
llvm-svn: 358958
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 25 |
1 files changed, 14 insertions, 11 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 0b11c156dcf..768860140e6 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -164,9 +164,9 @@ static cl::opt<unsigned> ComplexityLimit( cl::init(std::numeric_limits<uint16_t>::max()), cl::desc("LSR search space complexity limit")); -static cl::opt<bool> EnableRecursiveSetupCost( - "lsr-recursive-setupcost", cl::Hidden, cl::init(true), - cl::desc("Enable more thorough lsr setup cost calculation")); +static cl::opt<unsigned> SetupCostDepthLimit( + "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), + cl::desc("The limit on recursion depth for LSRs setup cost")); #ifndef NDEBUG // Stress test IV chain generation. @@ -1212,22 +1212,23 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, bool HasBaseReg, int64_t Scale, Instruction *Fixup = nullptr); -static unsigned getSetupCost(const SCEV *Reg) { +static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) { if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg)) return 1; - if (!EnableRecursiveSetupCost) + if (Depth == 0) return 0; if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg)) - return getSetupCost(S->getStart()); + return getSetupCost(S->getStart(), Depth - 1); if (auto S = dyn_cast<SCEVCastExpr>(Reg)) - return getSetupCost(S->getOperand()); + return getSetupCost(S->getOperand(), Depth - 1); if (auto S = dyn_cast<SCEVNAryExpr>(Reg)) return std::accumulate(S->op_begin(), S->op_end(), 0, - [](unsigned i, const SCEV *Reg) { - return i + getSetupCost(Reg); + [&](unsigned i, const SCEV *Reg) { + return i + getSetupCost(Reg, Depth - 1); }); if (auto S = dyn_cast<SCEVUDivExpr>(Reg)) - return getSetupCost(S->getLHS()) + getSetupCost(S->getRHS()); + return getSetupCost(S->getLHS(), Depth - 1) + + getSetupCost(S->getRHS(), Depth - 1); return 0; } @@ -1293,7 +1294,9 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg, // Rough heuristic; favor registers which don't require extra setup // instructions in the preheader. - C.SetupCost += getSetupCost(Reg); + C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit); + // Ensure we don't, even with the recusion limit, produce invalid costs. + C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16); C.NumIVMuls += isa<SCEVMulExpr>(Reg) && SE->hasComputableLoopEvolution(Reg, L); |