diff options
| author | David Green <david.green@arm.com> | 2018-08-11 07:37:31 +0000 |
|---|---|---|
| committer | David Green <david.green@arm.com> | 2018-08-11 07:37:31 +0000 |
| commit | f7111d1ecef12d766e08bd36740c10ebba02c6ec (patch) | |
| tree | e1ec4894d2ee205247abacf2fa1798226b922e65 /llvm/lib/Transforms | |
| parent | 395b80cd3cf41da895d0bf217dfc2a0918593a13 (diff) | |
| download | bcm5719-llvm-f7111d1ecef12d766e08bd36740c10ebba02c6ec.tar.gz bcm5719-llvm-f7111d1ecef12d766e08bd36740c10ebba02c6ec.zip | |
[UnJ] Improve explicit loop count checks
Try to improve the computed counts when it has been explicitly set by a pragma
or command line option. This moves the code around, so that first call to
computeUnrollCount to get a sensible count and override that if explicit unroll
and jam counts are specified.
Also added some extra debug messages for when unroll and jamming is disabled.
Differential Revision: https://reviews.llvm.org/D50075
llvm-svn: 339501
Diffstat (limited to 'llvm/lib/Transforms')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp | 119 |
1 files changed, 67 insertions, 52 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp index 86c99aed441..30dfb9b5dd2 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp @@ -149,7 +149,26 @@ static bool computeUnrollAndJamCount( OptimizationRemarkEmitter *ORE, unsigned OuterTripCount, unsigned OuterTripMultiple, unsigned OuterLoopSize, unsigned InnerTripCount, unsigned InnerLoopSize, TargetTransformInfo::UnrollingPreferences &UP) { - // Check for explicit Count from the "unroll-and-jam-count" option. + // First up use computeUnrollCount from the loop unroller to get a count + // for unrolling the outer loop, plus any loops requiring explicit + // unrolling we leave to the unroller. This uses UP.Threshold / + // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values. + // We have already checked that the loop has no unroll.* pragmas. + unsigned MaxTripCount = 0; + bool UseUpperBound = false; + bool ExplicitUnroll = computeUnrollCount( + L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount, + OuterTripMultiple, OuterLoopSize, UP, UseUpperBound); + if (ExplicitUnroll || UseUpperBound) { + // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it + // for the unroller instead. + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; explicit count set by " + "computeUnrollCount\n"); + UP.Count = 0; + return false; + } + + // Override with any explicit Count from the "unroll-and-jam-count" option. bool UserUnrollCount = UnrollAndJamCount.getNumOccurrences() > 0; if (UserUnrollCount) { UP.Count = UnrollAndJamCount; @@ -174,80 +193,76 @@ static bool computeUnrollAndJamCount( return true; } - // Use computeUnrollCount from the loop unroller to get a sensible count - // for the unrolling the outer loop. This uses UP.Threshold / - // UP.PartialThreshold / UP.MaxCount to come up with sensible loop values. - // We have already checked that the loop has no unroll.* pragmas. - unsigned MaxTripCount = 0; - bool UseUpperBound = false; - bool ExplicitUnroll = computeUnrollCount( - L, TTI, DT, LI, SE, EphValues, ORE, OuterTripCount, MaxTripCount, - OuterTripMultiple, OuterLoopSize, UP, UseUpperBound); - if (ExplicitUnroll || UseUpperBound) { - // If the user explicitly set the loop as unrolled, dont UnJ it. Leave it - // for the unroller instead. - UP.Count = 0; - return false; - } - bool PragmaEnableUnroll = HasUnrollAndJamEnablePragma(L); - ExplicitUnroll = PragmaCount > 0 || PragmaEnableUnroll || UserUnrollCount; + bool ExplicitUnrollAndJamCount = PragmaCount > 0 || UserUnrollCount; + bool ExplicitUnrollAndJam = PragmaEnableUnroll || ExplicitUnrollAndJamCount; // If the loop has an unrolling pragma, we want to be more aggressive with // unrolling limits. - if (ExplicitUnroll && OuterTripCount != 0) + if (ExplicitUnrollAndJam) UP.UnrollAndJamInnerLoopThreshold = PragmaUnrollAndJamThreshold; if (!UP.AllowRemainder && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >= UP.UnrollAndJamInnerLoopThreshold) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; can't create remainder and " + "inner loop too large\n"); UP.Count = 0; return false; } + // We have a sensible limit for the outer loop, now adjust it for the inner + // loop and UP.UnrollAndJamInnerLoopThreshold. If the outer limit was set + // explicitly, we want to stick to it. + if (!ExplicitUnrollAndJamCount && UP.AllowRemainder) { + while (UP.Count != 0 && getUnrollAndJammedLoopSize(InnerLoopSize, UP) >= + UP.UnrollAndJamInnerLoopThreshold) + UP.Count--; + } + + // If we are explicitly unroll and jamming, we are done. Otherwise there are a + // number of extra performance heuristics to check. + if (ExplicitUnrollAndJam) + return true; + // If the inner loop count is known and small, leave the entire loop nest to // be the unroller - if (!ExplicitUnroll && InnerTripCount && - InnerLoopSize * InnerTripCount < UP.Threshold) { + if (InnerTripCount && InnerLoopSize * InnerTripCount < UP.Threshold) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; small inner loop count is " + "being left for the unroller\n"); UP.Count = 0; return false; } - // We have a sensible limit for the outer loop, now adjust it for the inner - // loop and UP.UnrollAndJamInnerLoopThreshold. - while (UP.Count != 0 && UP.AllowRemainder && - getUnrollAndJammedLoopSize(InnerLoopSize, UP) >= - UP.UnrollAndJamInnerLoopThreshold) - UP.Count--; - - if (!ExplicitUnroll) { - // Check for situations where UnJ is likely to be unprofitable. Including - // subloops with more than 1 block. - if (SubLoop->getBlocks().size() != 1) { - UP.Count = 0; - return false; - } + // Check for situations where UnJ is likely to be unprofitable. Including + // subloops with more than 1 block. + if (SubLoop->getBlocks().size() != 1) { + LLVM_DEBUG( + dbgs() << "Won't unroll-and-jam; More than one inner loop block\n"); + UP.Count = 0; + return false; + } - // Limit to loops where there is something to gain from unrolling and - // jamming the loop. In this case, look for loads that are invariant in the - // outer loop and can become shared. - unsigned NumInvariant = 0; - for (BasicBlock *BB : SubLoop->getBlocks()) { - for (Instruction &I : *BB) { - if (auto *Ld = dyn_cast<LoadInst>(&I)) { - Value *V = Ld->getPointerOperand(); - const SCEV *LSCEV = SE.getSCEVAtScope(V, L); - if (SE.isLoopInvariant(LSCEV, L)) - NumInvariant++; - } + // Limit to loops where there is something to gain from unrolling and + // jamming the loop. In this case, look for loads that are invariant in the + // outer loop and can become shared. + unsigned NumInvariant = 0; + for (BasicBlock *BB : SubLoop->getBlocks()) { + for (Instruction &I : *BB) { + if (auto *Ld = dyn_cast<LoadInst>(&I)) { + Value *V = Ld->getPointerOperand(); + const SCEV *LSCEV = SE.getSCEVAtScope(V, L); + if (SE.isLoopInvariant(LSCEV, L)) + NumInvariant++; } } - if (NumInvariant == 0) { - UP.Count = 0; - return false; - } + } + if (NumInvariant == 0) { + LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; No loop invariant loads\n"); + UP.Count = 0; + return false; } - return ExplicitUnroll; + return false; } static LoopUnrollResult |

