summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSam Parker <sam.parker@arm.com>2017-10-23 08:05:14 +0000
committerSam Parker <sam.parker@arm.com>2017-10-23 08:05:14 +0000
commit487ab869422b1eb7be932eb70445d9d0541fdce1 (patch)
tree0693ef28145a9fda9451893d28f9fc81d5a32b43 /llvm/lib/Target
parent01e3bf8afd7ff515d44e5bcdc88e360af5da324a (diff)
downloadbcm5719-llvm-487ab869422b1eb7be932eb70445d9d0541fdce1.tar.gz
bcm5719-llvm-487ab869422b1eb7be932eb70445d9d0541fdce1.zip
[ARM] Allow unrolling of multi-block loops.
Before, loop unrolling was only enabled for loops with a single block. This restriction has been removed and replaced by: - allow a maximum of two exiting blocks, - a four basic block limit for cores with a branch predictor. Differential Revision: https://reviews.llvm.org/D38952 llvm-svn: 316313
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp51
1 files changed, 35 insertions, 16 deletions
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 4395a319864..ca80d6f53f4 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -586,34 +586,53 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
if (!ST->isMClass())
return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP);
- // Only enable on Thumb-2 targets for simple loops.
- if (!ST->isThumb2() || L->getNumBlocks() != 1)
- return;
-
// Disable loop unrolling for Oz and Os.
UP.OptSizeThreshold = 0;
UP.PartialOptSizeThreshold = 0;
- BasicBlock *BB = L->getLoopLatch();
- if (BB->getParent()->optForSize())
+ if (L->getHeader()->getParent()->optForSize())
+ return;
+
+ // Only enable on Thumb-2 targets.
+ if (!ST->isThumb2())
+ return;
+
+ SmallVector<BasicBlock*, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ DEBUG(dbgs() << "Loop has:\n"
+ << "Blocks: " << L->getNumBlocks() << "\n"
+ << "Exit blocks: " << ExitingBlocks.size() << "\n");
+
+ // Only allow another exit other than the latch. This acts as an early exit
+ // as it mirrors the profitability calculation of the runtime unroller.
+ if (ExitingBlocks.size() > 2)
+ return;
+
+ // Limit the CFG of the loop body for targets with a branch predictor.
+ // Allowing 4 blocks permits if-then-else diamonds in the body.
+ if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
return;
// Scan the loop: don't unroll loops with calls as this could prevent
// inlining.
unsigned Cost = 0;
- for (auto &I : *BB) {
- if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- ImmutableCallSite CS(&I);
- if (const Function *F = CS.getCalledFunction()) {
- if (!isLoweredToCall(F))
- continue;
+ for (auto *BB : L->getBlocks()) {
+ for (auto &I : *BB) {
+ if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+ ImmutableCallSite CS(&I);
+ if (const Function *F = CS.getCalledFunction()) {
+ if (!isLoweredToCall(F))
+ continue;
+ }
+ return;
}
- return;
+ SmallVector<const Value*, 4> Operands(I.value_op_begin(),
+ I.value_op_end());
+ Cost += getUserCost(&I, Operands);
}
- SmallVector<const Value*, 4> Operands(I.value_op_begin(),
- I.value_op_end());
- Cost += getUserCost(&I, Operands);
}
+ DEBUG(dbgs() << "Cost of loop: " << Cost << "\n");
+
UP.Partial = true;
UP.Runtime = true;
UP.UnrollRemainder = true;
OpenPOWER on IntegriCloud