From 487ab869422b1eb7be932eb70445d9d0541fdce1 Mon Sep 17 00:00:00 2001 From: Sam Parker Date: Mon, 23 Oct 2017 08:05:14 +0000 Subject: [ARM] Allow unrolling of multi-block loops. Before, loop unrolling was only enabled for loops with a single block. This restriction has been removed and replaced by: - allow a maximum of two exiting blocks, - a four basic block limit for cores with a branch predictor. Differential Revision: https://reviews.llvm.org/D38952 llvm-svn: 316313 --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 51 ++++++++++++++++++-------- 1 file changed, 35 insertions(+), 16 deletions(-) (limited to 'llvm/lib/Target') diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index 4395a319864..ca80d6f53f4 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -586,34 +586,53 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, if (!ST->isMClass()) return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP); - // Only enable on Thumb-2 targets for simple loops. - if (!ST->isThumb2() || L->getNumBlocks() != 1) - return; - // Disable loop unrolling for Oz and Os. UP.OptSizeThreshold = 0; UP.PartialOptSizeThreshold = 0; - BasicBlock *BB = L->getLoopLatch(); - if (BB->getParent()->optForSize()) + if (L->getHeader()->getParent()->optForSize()) + return; + + // Only enable on Thumb-2 targets. + if (!ST->isThumb2()) + return; + + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + DEBUG(dbgs() << "Loop has:\n" + << "Blocks: " << L->getNumBlocks() << "\n" + << "Exit blocks: " << ExitingBlocks.size() << "\n"); + + // Only allow another exit other than the latch. This acts as an early exit + // as it mirrors the profitability calculation of the runtime unroller. + if (ExitingBlocks.size() > 2) + return; + + // Limit the CFG of the loop body for targets with a branch predictor. + // Allowing 4 blocks permits if-then-else diamonds in the body. + if (ST->hasBranchPredictor() && L->getNumBlocks() > 4) return; // Scan the loop: don't unroll loops with calls as this could prevent // inlining. unsigned Cost = 0; - for (auto &I : *BB) { - if (isa(I) || isa(I)) { - ImmutableCallSite CS(&I); - if (const Function *F = CS.getCalledFunction()) { - if (!isLoweredToCall(F)) - continue; + for (auto *BB : L->getBlocks()) { + for (auto &I : *BB) { + if (isa(I) || isa(I)) { + ImmutableCallSite CS(&I); + if (const Function *F = CS.getCalledFunction()) { + if (!isLoweredToCall(F)) + continue; + } + return; } - return; + SmallVector Operands(I.value_op_begin(), + I.value_op_end()); + Cost += getUserCost(&I, Operands); } - SmallVector Operands(I.value_op_begin(), - I.value_op_end()); - Cost += getUserCost(&I, Operands); } + DEBUG(dbgs() << "Cost of loop: " << Cost << "\n"); + UP.Partial = true; UP.Runtime = true; UP.UnrollRemainder = true; -- cgit v1.2.3