diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 217 |
1 files changed, 111 insertions, 106 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 7bd1fcffbd5..3d2ae3a5b61 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -414,41 +414,39 @@ namespace { // v = b[0]* 0 + b[1]* 1 + b[2]* 0 // And finally: // v = b[1] -class UnrollAnalyzer : public InstVisitor<UnrollAnalyzer, bool> { - typedef InstVisitor<UnrollAnalyzer, bool> Base; - friend class InstVisitor<UnrollAnalyzer, bool>; +class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> { + typedef InstVisitor<UnrolledInstAnalyzer, bool> Base; + friend class InstVisitor<UnrolledInstAnalyzer, bool>; - /// \brief The loop we're going to analyze. - const Loop *L; +public: + UnrolledInstAnalyzer(unsigned Iteration, + DenseMap<Value *, Constant *> &SimplifiedValues, + SmallDenseMap<Value *, SCEVGEPDescriptor> &SCEVGEPCache) + : Iteration(Iteration), SimplifiedValues(SimplifiedValues), + SCEVGEPCache(SCEVGEPCache) {} - /// \brief TripCount of the given loop. - unsigned TripCount; + // Allow access to the initial visit method. + using Base::visit; - ScalarEvolution &SE; - - const TargetTransformInfo &TTI; +private: + /// \brief Number of currently simulated iteration. + /// + /// If an expression is ConstAddress+Constant, then the Constant is + /// Start + Iteration*Step, where Start and Step could be obtained from + /// SCEVGEPCache. + unsigned Iteration; // While we walk the loop instructions, we we build up and maintain a mapping // of simplified values specific to this iteration. The idea is to propagate // any special information we have about loads that can be replaced with // constants after complete unrolling, and account for likely simplifications // post-unrolling. - DenseMap<Value *, Constant *> SimplifiedValues; + DenseMap<Value *, Constant *> &SimplifiedValues; // To avoid requesting SCEV info on every iteration, request it once, and // for each value that would become ConstAddress+Constant after loop // unrolling, save the corresponding data. - SmallDenseMap<Value *, SCEVGEPDescriptor> SCEVGEPCache; - - /// \brief Number of currently simulated iteration. - /// - /// If an expression is ConstAddress+Constant, then the Constant is - /// Start + Iteration*Step, where Start and Step could be obtained from - /// SCEVGEPCache. - unsigned Iteration; - - /// \brief Upper threshold for complete unrolling. - unsigned MaxUnrolledLoopSize; + SmallDenseMap<Value *, SCEVGEPDescriptor> &SCEVGEPCache; /// Base case for the instruction visitor. bool visitInstruction(Instruction &I) { return false; }; @@ -524,95 +522,102 @@ class UnrollAnalyzer : public InstVisitor<UnrollAnalyzer, bool> { return true; } +}; +} // namespace -public: - UnrollAnalyzer(const Loop *L, unsigned TripCount, ScalarEvolution &SE, - const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) - : L(L), TripCount(TripCount), SE(SE), TTI(TTI), - MaxUnrolledLoopSize(MaxUnrolledLoopSize), - NumberOfOptimizedInstructions(0), UnrolledLoopSize(0) {} +namespace { +struct EstimatedUnrollCost { /// \brief Count the number of optimized instructions. unsigned NumberOfOptimizedInstructions; /// \brief Count the total number of instructions. unsigned UnrolledLoopSize; +}; +} - /// \brief Figure out if the loop is worth full unrolling. - /// - /// Complete loop unrolling can make some loads constant, and we need to know - /// if that would expose any further optimization opportunities. This routine - /// estimates this optimization. It assigns computed number of instructions, - /// that potentially might be optimized away, to - /// NumberOfOptimizedInstructions, and total number of instructions to - /// UnrolledLoopSize (not counting blocks that won't be reached, if we were - /// able to compute the condition). - /// \returns false if we can't analyze the loop, or if we discovered that - /// unrolling won't give anything. Otherwise, returns true. - bool analyzeLoop() { - SmallSetVector<BasicBlock *, 16> BBWorklist; - - // We want to be able to scale offsets by the trip count and add more - // offsets to them without checking for overflows, and we already don't want - // to analyze *massive* trip counts, so we force the max to be reasonably - // small. - assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) && - "The unroll iterations max is too large!"); - - // Don't simulate loops with a big or unknown tripcount - if (!UnrollMaxIterationsCountToAnalyze || !TripCount || - TripCount > UnrollMaxIterationsCountToAnalyze) - return false; - - // To avoid compute SCEV-expressions on every iteration, compute them once - // and store interesting to us in SCEVGEPCache. - SCEVGEPCache = buildSCEVGEPCache(*L, SE); - - // Simulate execution of each iteration of the loop counting instructions, - // which would be simplified. - // Since the same load will take different values on different iterations, - // we literally have to go through all loop's iterations. - for (Iteration = 0; Iteration < TripCount; ++Iteration) { - SimplifiedValues.clear(); - BBWorklist.clear(); - BBWorklist.insert(L->getHeader()); - // Note that we *must not* cache the size, this loop grows the worklist. - for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { - BasicBlock *BB = BBWorklist[Idx]; - - // Visit all instructions in the given basic block and try to simplify - // it. We don't change the actual IR, just count optimization - // opportunities. - for (Instruction &I : *BB) { - UnrolledLoopSize += TTI.getUserCost(&I); - - // Visit the instruction to analyze its loop cost after unrolling, - // and if the visitor returns true, then we can optimize this - // instruction away. - if (Base::visit(I)) - NumberOfOptimizedInstructions += TTI.getUserCost(&I); - - // If unrolled body turns out to be too big, bail out. - if (UnrolledLoopSize - NumberOfOptimizedInstructions > - MaxUnrolledLoopSize) - return false; - } +/// \brief Figure out if the loop is worth full unrolling. +/// +/// Complete loop unrolling can make some loads constant, and we need to know +/// if that would expose any further optimization opportunities. This routine +/// estimates this optimization. It assigns computed number of instructions, +/// that potentially might be optimized away, to +/// NumberOfOptimizedInstructions, and total number of instructions to +/// UnrolledLoopSize (not counting blocks that won't be reached, if we were +/// able to compute the condition). +/// \returns false if we can't analyze the loop, or if we discovered that +/// unrolling won't give anything. Otherwise, returns true. +Optional<EstimatedUnrollCost> +analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, + const TargetTransformInfo &TTI, + unsigned MaxUnrolledLoopSize) { + // We want to be able to scale offsets by the trip count and add more offsets + // to them without checking for overflows, and we already don't want to + // analyze *massive* trip counts, so we force the max to be reasonably small. + assert(UnrollMaxIterationsCountToAnalyze < (INT_MAX / 2) && + "The unroll iterations max is too large!"); + + // Don't simulate loops with a big or unknown tripcount + if (!UnrollMaxIterationsCountToAnalyze || !TripCount || + TripCount > UnrollMaxIterationsCountToAnalyze) + return None; + + // To avoid compute SCEV-expressions on every iteration, compute them once + // and store interesting to us in SCEVGEPCache. + SmallDenseMap<Value *, SCEVGEPDescriptor> SCEVGEPCache = + buildSCEVGEPCache(*L, SE); + + SmallSetVector<BasicBlock *, 16> BBWorklist; + DenseMap<Value *, Constant *> SimplifiedValues; - // Add BB's successors to the worklist. - for (BasicBlock *Succ : successors(BB)) - if (L->contains(Succ)) - BBWorklist.insert(Succ); + unsigned NumberOfOptimizedInstructions = 0; + unsigned UnrolledLoopSize = 0; + + // Simulate execution of each iteration of the loop counting instructions, + // which would be simplified. + // Since the same load will take different values on different iterations, + // we literally have to go through all loop's iterations. + for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { + SimplifiedValues.clear(); + UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SCEVGEPCache); + + BBWorklist.clear(); + BBWorklist.insert(L->getHeader()); + // Note that we *must not* cache the size, this loop grows the worklist. + for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { + BasicBlock *BB = BBWorklist[Idx]; + + // Visit all instructions in the given basic block and try to simplify + // it. We don't change the actual IR, just count optimization + // opportunities. + for (Instruction &I : *BB) { + UnrolledLoopSize += TTI.getUserCost(&I); + + // Visit the instruction to analyze its loop cost after unrolling, + // and if the visitor returns true, then we can optimize this + // instruction away. + if (Analyzer.visit(I)) + NumberOfOptimizedInstructions += TTI.getUserCost(&I); + + // If unrolled body turns out to be too big, bail out. + if (UnrolledLoopSize - NumberOfOptimizedInstructions > + MaxUnrolledLoopSize) + return None; } - // If we found no optimization opportunities on the first iteration, we - // won't find them on later ones too. - if (!NumberOfOptimizedInstructions) - return false; + // Add BB's successors to the worklist. + for (BasicBlock *Succ : successors(BB)) + if (L->contains(Succ)) + BBWorklist.insert(Succ); } - return true; + + // If we found no optimization opportunities on the first iteration, we + // won't find them on later ones too. + if (!NumberOfOptimizedInstructions) + return None; } -}; -} // namespace + return {{NumberOfOptimizedInstructions, UnrolledLoopSize}}; +} /// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, @@ -886,14 +891,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // The loop isn't that small, but we still can fully unroll it if that // helps to remove a significant number of instructions. // To check that, run additional analysis on the loop. - UnrollAnalyzer UA(L, TripCount, *SE, TTI, AbsoluteThreshold); - if (UA.analyzeLoop() && - canUnrollCompletely(L, Threshold, AbsoluteThreshold, - UA.UnrolledLoopSize, - UA.NumberOfOptimizedInstructions, - PercentOfOptimizedForCompleteUnroll)) { - Unrolling = Full; - } + if (Optional<EstimatedUnrollCost> Cost = + analyzeLoopUnrollCost(L, TripCount, *SE, TTI, AbsoluteThreshold)) + if (canUnrollCompletely(L, Threshold, AbsoluteThreshold, + Cost->UnrolledLoopSize, + Cost->NumberOfOptimizedInstructions, + PercentOfOptimizedForCompleteUnroll)) { + Unrolling = Full; + } } } else if (TripCount && Count < TripCount) { Unrolling = Partial; |

