summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp25
1 files changed, 14 insertions, 11 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 0b11c156dcf..768860140e6 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -164,9 +164,9 @@ static cl::opt<unsigned> ComplexityLimit(
cl::init(std::numeric_limits<uint16_t>::max()),
cl::desc("LSR search space complexity limit"));
-static cl::opt<bool> EnableRecursiveSetupCost(
- "lsr-recursive-setupcost", cl::Hidden, cl::init(true),
- cl::desc("Enable more thorough lsr setup cost calculation"));
+static cl::opt<unsigned> SetupCostDepthLimit(
+ "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),
+ cl::desc("The limit on recursion depth for LSRs setup cost"));
#ifndef NDEBUG
// Stress test IV chain generation.
@@ -1212,22 +1212,23 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
bool HasBaseReg, int64_t Scale,
Instruction *Fixup = nullptr);
-static unsigned getSetupCost(const SCEV *Reg) {
+static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {
if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
return 1;
- if (!EnableRecursiveSetupCost)
+ if (Depth == 0)
return 0;
if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
- return getSetupCost(S->getStart());
+ return getSetupCost(S->getStart(), Depth - 1);
if (auto S = dyn_cast<SCEVCastExpr>(Reg))
- return getSetupCost(S->getOperand());
+ return getSetupCost(S->getOperand(), Depth - 1);
if (auto S = dyn_cast<SCEVNAryExpr>(Reg))
return std::accumulate(S->op_begin(), S->op_end(), 0,
- [](unsigned i, const SCEV *Reg) {
- return i + getSetupCost(Reg);
+ [&](unsigned i, const SCEV *Reg) {
+ return i + getSetupCost(Reg, Depth - 1);
});
if (auto S = dyn_cast<SCEVUDivExpr>(Reg))
- return getSetupCost(S->getLHS()) + getSetupCost(S->getRHS());
+ return getSetupCost(S->getLHS(), Depth - 1) +
+ getSetupCost(S->getRHS(), Depth - 1);
return 0;
}
@@ -1293,7 +1294,9 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
// Rough heuristic; favor registers which don't require extra setup
// instructions in the preheader.
- C.SetupCost += getSetupCost(Reg);
+ C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit);
+ // Ensure we don't, even with the recusion limit, produce invalid costs.
+ C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16);
C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
SE->hasComputableLoopEvolution(Reg, L);
OpenPOWER on IntegriCloud