summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorIkhlas Ajbar <iajbar@codeaurora.org>2018-04-03 03:39:43 +0000
committerIkhlas Ajbar <iajbar@codeaurora.org>2018-04-03 03:39:43 +0000
commitb7322e8ac7a99b059dc7c3930436cebe72b53b8e (patch)
treeabfa9e7c3351221fbd99c7ebd8076a03e7fe870c /llvm
parent35a75cae92a0ed65966fa934fc087a7eb1f9ca26 (diff)
downloadbcm5719-llvm-b7322e8ac7a99b059dc7c3930436cebe72b53b8e.tar.gz
bcm5719-llvm-b7322e8ac7a99b059dc7c3930436cebe72b53b8e.zip
peel loops with runtime small trip counts
For Hexagon, peeling loops with small runtime trip count is beneficial for our benchmarks. We set PeelCount in HexagonTargetInfo.cpp and we use PeelCount set by the target for computing the desired peel count. Differential Revision: https://reviews.llvm.org/D44880 llvm-svn: 329042
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/Transforms/Utils/UnrollLoop.h2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp9
-rw-r--r--llvm/test/Transforms/LoopUnroll/Hexagon/peel-small-loop.ll37
5 files changed, 56 insertions, 2 deletions
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index 3983637a5a5..d96c65804d8 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -73,6 +73,8 @@ void computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
unsigned &TripCount, ScalarEvolution &SE);
+bool canPeel(Loop *L);
+
bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE,
DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA);
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 3290b5931ad..786f8eb4d8f 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -44,6 +44,13 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
UP.Runtime = UP.Partial = true;
+ // Only try to peel innermost loops with small runtime trip counts.
+ if (L && L->empty() &&
+ SE.getSmallConstantTripCount(L) == 0 &&
+ SE.getSmallConstantMaxTripCount(L) > 0 &&
+ SE.getSmallConstantMaxTripCount(L) <= 5) {
+ UP.PeelCount = 2;
+ }
}
bool HexagonTTIImpl::shouldFavorPostInc() const {
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index a1b25a22a14..ad454d77406 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -978,6 +978,9 @@ static LoopUnrollResult tryToUnrollLoop(
if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
return LoopUnrollResult::Unmodified;
+ if (UP.PeelCount && !canPeel(L))
+ UP.PeelCount = 0;
+
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
index 5f465f328b0..f47744d5593 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -69,7 +69,7 @@ static const unsigned InfiniteIterationsToInvariance =
std::numeric_limits<unsigned>::max();
// Check whether we are capable of peeling this loop.
-static bool canPeel(Loop *L) {
+bool llvm::canPeel(Loop *L) {
// Make sure the loop is in simplified form
if (!L->isLoopSimplifyForm())
return false;
@@ -221,6 +221,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::UnrollingPreferences &UP,
unsigned &TripCount, ScalarEvolution &SE) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
+ // Save the UP.PeelCount value set by the target in
+ // TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
+ unsigned TargetPeelCount = UP.PeelCount;
UP.PeelCount = 0;
if (!canPeel(L))
return;
@@ -240,7 +243,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
// Now go through all Phis to calculate their the number of iterations they
// need to become invariants.
- unsigned DesiredPeelCount = 0;
+ // Start the max computation with the UP.PeelCount value set by the target
+ // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count.
+ unsigned DesiredPeelCount = TargetPeelCount;
BasicBlock *BackEdge = L->getLoopLatch();
assert(BackEdge && "Loop is not in simplified form?");
for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
diff --git a/llvm/test/Transforms/LoopUnroll/Hexagon/peel-small-loop.ll b/llvm/test/Transforms/LoopUnroll/Hexagon/peel-small-loop.ll
new file mode 100644
index 00000000000..45c2553a70a
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/Hexagon/peel-small-loop.ll
@@ -0,0 +1,37 @@
+; RUN: opt -loop-unroll -mtriple=hexagon -S < %s | FileCheck %s
+; Check that the loop is peeled twice for Hexagon.
+; CHECK: while.body.peel
+; CHECK: while.body.peel2
+
+%struct.STREAM = type { %union.anon, i32, i32 }
+%union.anon = type { i32* }
+
+define void @function(%struct.STREAM* nocapture readonly %b) local_unnamed_addr {
+entry:
+ %bitPtr3 = getelementptr inbounds %struct.STREAM, %struct.STREAM* %b, i32 0, i32 2
+ %0 = load i32, i32* %bitPtr3, align 4
+ %cmp11 = icmp ult i32 %0, 32
+ br i1 %cmp11, label %while.body.preheader, label %do.end
+
+while.body.preheader:
+ %value2 = getelementptr inbounds %struct.STREAM, %struct.STREAM* %b, i32 0, i32 1
+ %1 = load i32, i32* %value2, align 4
+ %w = getelementptr inbounds %struct.STREAM, %struct.STREAM* %b, i32 0, i32 0, i32 0
+ %2 = load i32*, i32** %w, align 4
+ br label %while.body
+
+while.body:
+ %bitPtr.014 = phi i32 [ %add, %while.body ], [ %0, %while.body.preheader ]
+ %value.013 = phi i32 [ %shl, %while.body ], [ %1, %while.body.preheader ]
+ %ptr.012 = phi i32* [ %incdec.ptr, %while.body ], [ %2, %while.body.preheader ]
+ %add = add nuw i32 %bitPtr.014, 8
+ %shr = lshr i32 %value.013, 24
+ %incdec.ptr = getelementptr inbounds i32, i32* %ptr.012, i32 1
+ store i32 %shr, i32* %ptr.012, align 4
+ %shl = shl i32 %value.013, 8
+ %cmp = icmp ult i32 %add, 17
+ br i1 %cmp, label %while.body, label %do.end
+
+do.end:
+ ret void
+}
OpenPOWER on IntegriCloud