summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSam Parker <sam.parker@arm.com>2017-08-16 07:42:44 +0000
committerSam Parker <sam.parker@arm.com>2017-08-16 07:42:44 +0000
commit84fd0c3bf27c5a950025a2f4b8fdd80c3330679a (patch)
tree03b3a703c26594c712f896ea99288bc23571e7e7 /llvm/lib/Target
parentce5ea3813593753c8a96ea28363ac1abddea1023 (diff)
downloadbcm5719-llvm-84fd0c3bf27c5a950025a2f4b8fdd80c3330679a.tar.gz
bcm5719-llvm-84fd0c3bf27c5a950025a2f4b8fdd80c3330679a.zip
[ARM] Improve loop unrolling for Cortex-M
- Set the default runtime unroll count to 4 and use the newly added UnrollRemainder option. - Create loop cost and force unroll for a cost less than 12. - Disable unrolling on Thumb1 only targets. Differential Revision: https://reviews.llvm.org/D36134 llvm-svn: 310997
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp25
1 files changed, 19 insertions, 6 deletions
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index dc222bddeab..2066a0e5b72 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -566,16 +566,23 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
// Only currently enable these preferences for M-Class cores.
- if (!ST->isMClass() || L->getNumBlocks() != 1)
+ if (!ST->isMClass())
return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP);
+ // Only enable on Thumb-2 targets for simple loops.
+ if (!ST->isThumb2() || L->getNumBlocks() != 1)
+ return;
+
// Disable loop unrolling for Oz and Os.
UP.OptSizeThreshold = 0;
UP.PartialOptSizeThreshold = 0;
+ BasicBlock *BB = L->getLoopLatch();
+ if (BB->getParent()->optForSize())
+ return;
// Scan the loop: don't unroll loops with calls as this could prevent
// inlining.
- BasicBlock *BB = L->getLoopLatch();
+ unsigned Cost = 0;
for (auto &I : *BB) {
if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
ImmutableCallSite CS(&I);
@@ -585,12 +592,18 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
}
return;
}
+ SmallVector<const Value*, 4> Operands(I.value_op_begin(),
+ I.value_op_end());
+ Cost += getUserCost(&I, Operands);
}
- // Enable partial and runtime unrolling, set the initial threshold based upon
- // the number of registers available.
UP.Partial = true;
UP.Runtime = true;
- UP.Threshold = ST->isThumb1Only() ? 75 : 150;
- UP.PartialThreshold = ST->isThumb1Only() ? 75 : 150;
+ UP.UnrollRemainder = true;
+ UP.DefaultUnrollRuntimeCount = 4;
+
+ // Force unrolling small loops can be very useful because of the branch
+ // taken cost of the backedge.
+ if (Cost < 12)
+ UP.Force = true;
}
OpenPOWER on IntegriCloud