diff options
5 files changed, 56 insertions, 2 deletions
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 3983637a5a5..d96c65804d8 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -73,6 +73,8 @@ void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE); +bool canPeel(Loop *L); + bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 3290b5931ad..786f8eb4d8f 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -44,6 +44,13 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const { void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { UP.Runtime = UP.Partial = true; + // Only try to peel innermost loops with small runtime trip counts. + if (L && L->empty() && + SE.getSmallConstantTripCount(L) == 0 && + SE.getSmallConstantMaxTripCount(L) > 0 && + SE.getSmallConstantMaxTripCount(L) <= 5) { + UP.PeelCount = 2; + } } bool HexagonTTIImpl::shouldFavorPostInc() const { diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index a1b25a22a14..ad454d77406 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -978,6 +978,9 @@ static LoopUnrollResult tryToUnrollLoop( if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0)) return LoopUnrollResult::Unmodified; + if (UP.PeelCount && !canPeel(L)) + UP.PeelCount = 0; + SmallPtrSet<const Value *, 32> EphValues; CodeMetrics::collectEphemeralValues(L, &AC, EphValues); diff --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp index 5f465f328b0..f47744d5593 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -69,7 +69,7 @@ static const unsigned InfiniteIterationsToInvariance = std::numeric_limits<unsigned>::max(); // Check whether we are capable of peeling this loop. -static bool canPeel(Loop *L) { +bool llvm::canPeel(Loop *L) { // Make sure the loop is in simplified form if (!L->isLoopSimplifyForm()) return false; @@ -221,6 +221,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, unsigned &TripCount, ScalarEvolution &SE) { assert(LoopSize > 0 && "Zero loop size is not allowed!"); + // Save the UP.PeelCount value set by the target in + // TTI.getUnrollingPreferences or by the flag -unroll-peel-count. + unsigned TargetPeelCount = UP.PeelCount; UP.PeelCount = 0; if (!canPeel(L)) return; @@ -240,7 +243,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, SmallDenseMap<PHINode *, unsigned> IterationsToInvariance; // Now go through all Phis to calculate their the number of iterations they // need to become invariants. - unsigned DesiredPeelCount = 0; + // Start the max computation with the UP.PeelCount value set by the target + // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count. + unsigned DesiredPeelCount = TargetPeelCount; BasicBlock *BackEdge = L->getLoopLatch(); assert(BackEdge && "Loop is not in simplified form?"); for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) { diff --git a/llvm/test/Transforms/LoopUnroll/Hexagon/peel-small-loop.ll b/llvm/test/Transforms/LoopUnroll/Hexagon/peel-small-loop.ll new file mode 100644 index 00000000000..45c2553a70a --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/Hexagon/peel-small-loop.ll @@ -0,0 +1,37 @@ +; RUN: opt -loop-unroll -mtriple=hexagon -S < %s | FileCheck %s +; Check that the loop is peeled twice for Hexagon. +; CHECK: while.body.peel +; CHECK: while.body.peel2 + +%struct.STREAM = type { %union.anon, i32, i32 } +%union.anon = type { i32* } + +define void @function(%struct.STREAM* nocapture readonly %b) local_unnamed_addr { +entry: + %bitPtr3 = getelementptr inbounds %struct.STREAM, %struct.STREAM* %b, i32 0, i32 2 + %0 = load i32, i32* %bitPtr3, align 4 + %cmp11 = icmp ult i32 %0, 32 + br i1 %cmp11, label %while.body.preheader, label %do.end + +while.body.preheader: + %value2 = getelementptr inbounds %struct.STREAM, %struct.STREAM* %b, i32 0, i32 1 + %1 = load i32, i32* %value2, align 4 + %w = getelementptr inbounds %struct.STREAM, %struct.STREAM* %b, i32 0, i32 0, i32 0 + %2 = load i32*, i32** %w, align 4 + br label %while.body + +while.body: + %bitPtr.014 = phi i32 [ %add, %while.body ], [ %0, %while.body.preheader ] + %value.013 = phi i32 [ %shl, %while.body ], [ %1, %while.body.preheader ] + %ptr.012 = phi i32* [ %incdec.ptr, %while.body ], [ %2, %while.body.preheader ] + %add = add nuw i32 %bitPtr.014, 8 + %shr = lshr i32 %value.013, 24 + %incdec.ptr = getelementptr inbounds i32, i32* %ptr.012, i32 1 + store i32 %shr, i32* %ptr.012, align 4 + %shl = shl i32 %value.013, 8 + %cmp = icmp ult i32 %add, 17 + br i1 %cmp, label %while.body, label %do.end + +do.end: + ret void +} |

