summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2019-03-07 13:44:40 +0000
committerDavid Green <david.green@arm.com>2019-03-07 13:44:40 +0000
commitffc922ec35f8991d5c97b89ae4fe18c15684b1d6 (patch)
tree6f441e4f2193ec551ae35c0f425bc4727fb2a947 /llvm/lib
parent5caba3069e8185b2c8225080c95755ea906b624e (diff)
downloadbcm5719-llvm-ffc922ec35f8991d5c97b89ae4fe18c15684b1d6.tar.gz
bcm5719-llvm-ffc922ec35f8991d5c97b89ae4fe18c15684b1d6.zip
[LSR] Attempt to increase the accuracy of LSR's setup cost
In some loops, we end up generating loop induction variables that look like: {(-1 * (zext i16 (%i0 * %i1) to i32))<nsw>,+,1} As opposed to the simpler: {(zext i16 (%i0 * %i1) to i32),+,-1} i.e we count up from -limit to 0, not the simpler counting down from limit to 0. This is because the scores, as LSR calculates them, are the same and the second is filtered in place of the first. We end up with a redundant SUB from 0 in the code. This patch tries to make the calculation of the setup cost a little more thoroughly, recursing into the scev members to better approximate the setup required. The cost function for comparing LSR costs is: return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, C1.ScaleCost, C1.ImmCost, C1.SetupCost) < std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, C2.ScaleCost, C2.ImmCost, C2.SetupCost); So this will only alter results if none of the other variables turn out to be different. Differential Revision: https://reviews.llvm.org/D58770 llvm-svn: 355597
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp31
1 files changed, 25 insertions, 6 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index a1533019128..77af68ef119 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -115,6 +115,7 @@
#include <cstdlib>
#include <iterator>
#include <limits>
+#include <numeric>
#include <map>
#include <utility>
@@ -163,6 +164,10 @@ static cl::opt<unsigned> ComplexityLimit(
cl::init(std::numeric_limits<uint16_t>::max()),
cl::desc("LSR search space complexity limit"));
+static cl::opt<bool> EnableRecursiveSetupCost(
+ "lsr-recursive-setupcost", cl::Hidden, cl::init(true),
+ cl::desc("Enable more thorough lsr setup cost calculation"));
+
#ifndef NDEBUG
// Stress test IV chain generation.
static cl::opt<bool> StressIVChain(
@@ -1211,6 +1216,25 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
bool HasBaseReg, int64_t Scale,
Instruction *Fixup = nullptr);
+static unsigned getSetupCost(const SCEV *Reg) {
+ if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
+ return 1;
+ if (!EnableRecursiveSetupCost)
+ return 0;
+ if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
+ return getSetupCost(S->getStart());
+ if (auto S = dyn_cast<SCEVCastExpr>(Reg))
+ return getSetupCost(S->getOperand());
+ if (auto S = dyn_cast<SCEVNAryExpr>(Reg))
+ return std::accumulate(S->op_begin(), S->op_end(), 0,
+ [](unsigned i, const SCEV *Reg) {
+ return i + getSetupCost(Reg);
+ });
+ if (auto S = dyn_cast<SCEVUDivExpr>(Reg))
+ return getSetupCost(S->getLHS()) + getSetupCost(S->getRHS());
+ return 0;
+}
+
/// Tally up interesting quantities from the given register.
void Cost::RateRegister(const Formula &F, const SCEV *Reg,
SmallPtrSetImpl<const SCEV *> &Regs,
@@ -1276,12 +1300,7 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
// Rough heuristic; favor registers which don't require extra setup
// instructions in the preheader.
- if (!isa<SCEVUnknown>(Reg) &&
- !isa<SCEVConstant>(Reg) &&
- !(isa<SCEVAddRecExpr>(Reg) &&
- (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
- isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
- ++C.SetupCost;
+ C.SetupCost += getSetupCost(Reg);
C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
SE.hasComputableLoopEvolution(Reg, L);
OpenPOWER on IntegriCloud