diff options
Diffstat (limited to 'llvm/lib/CodeGen/MachineOutliner.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachineOutliner.cpp | 193 |
1 files changed, 106 insertions, 87 deletions
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index c12bf52c0e2..936a8106224 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -231,14 +231,18 @@ struct SuffixTreeNode { /// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf class SuffixTree { public: - /// Stores each leaf node in the tree. - /// - /// This is used for finding outlining candidates. - std::vector<SuffixTreeNode *> LeafVector; - /// Each element is an integer representing an instruction in the module. ArrayRef<unsigned> Str; + /// A repeated substring in the tree. + struct RepeatedSubstring { + /// The length of the string. + unsigned Length; + + /// The start indices of each occurrence. + std::vector<unsigned> StartIndices; + }; + private: /// Maintains each node in the tree. SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator; @@ -322,8 +326,7 @@ private: } /// Set the suffix indices of the leaves to the start indices of their - /// respective suffixes. Also stores each leaf in \p LeafVector at its - /// respective suffix index. + /// respective suffixes. /// /// \param[in] CurrNode The node currently being visited. /// \param CurrIdx The current index of the string being visited. @@ -353,9 +356,6 @@ private: CurrNode.SuffixIdx = Str.size() - CurrIdx; assert(CurrNode.Parent && "CurrNode had no parent!"); CurrNode.Parent->OccurrenceCount++; - - // Store the leaf in the leaf vector for pruning later. - LeafVector[CurrNode.SuffixIdx] = &CurrNode; } } @@ -489,6 +489,44 @@ private: return SuffixesToAdd; } + /// Helper function for findRepeatedSubstrings. + /// Traverses the suffix tree that finds all nodes associated with a repeated + /// substring. That is, all internal non-root nodes. If the given node has + /// more than one leaf child, store the repeated strings in Substrings. + void + findRepeatedSubstringsHelper(SuffixTreeNode &Curr, + std::vector<RepeatedSubstring> &Substrings, + const unsigned MinLength = 1) { + assert(!Curr.isLeaf() && "Visited a leaf?"); + std::vector<SuffixTreeNode *> LeafChildren; + unsigned Length = Curr.ConcatLen; + + for (auto &ChildPair : Curr.Children) { + if (!ChildPair.second->isLeaf()) + findRepeatedSubstringsHelper(*ChildPair.second, Substrings, MinLength); + else if (Length >= MinLength) + LeafChildren.push_back(ChildPair.second); + } + + // The root node never has repeats. Quit here. + if (Curr.isRoot()) + return; + + // If there are no occurrences of the minimum length, then quit. + if (LeafChildren.empty() || LeafChildren.size() < 2) + return; + + // We have a node associated with a repeated substring. Store that in + // Substrings and move on. + RepeatedSubstring RS; + RS.Length = Length; + + // Each occurrence starts at a suffix given by a leaf child. + for (SuffixTreeNode *Leaf : LeafChildren) + RS.StartIndices.push_back(Leaf->SuffixIdx); + Substrings.push_back(RS); +} + public: /// Construct a suffix tree from a sequence of unsigned integers. /// @@ -497,7 +535,6 @@ public: Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0); Root->IsInTree = true; Active.Node = Root; - LeafVector = std::vector<SuffixTreeNode *>(Str.size()); // Keep track of the number of suffixes we have to add of the current // prefix. @@ -518,6 +555,15 @@ public: assert(Root && "Root node can't be nullptr!"); setSuffixIndices(*Root, 0); } + + /// Finds all repeated substrings with an optionally-provided minimum length + /// and stores them in \p Substrings. + /// If \p MinLength is provided, only return those with a given minimum + /// length. + void findRepeatedSubstrings(std::vector<RepeatedSubstring> &Substrings, + const unsigned MinLength = 1) { + findRepeatedSubstringsHelper(*Root, Substrings, MinLength); + } }; /// Maps \p MachineInstrs to unsigned integers and stores the mappings. @@ -925,80 +971,55 @@ unsigned MachineOutliner::findCandidates( FunctionList.clear(); unsigned MaxLen = 0; - // FIXME: Visit internal nodes instead of leaves. - for (SuffixTreeNode *Leaf : ST.LeafVector) { - assert(Leaf && "Leaves in LeafVector cannot be null!"); - if (!Leaf->IsInTree) - continue; - - assert(Leaf->Parent && "All leaves must have parents!"); - SuffixTreeNode &Parent = *(Leaf->Parent); - - // If it doesn't appear enough, or we already outlined from it, skip it. - if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree) - continue; + // First, find dall of the repeated substrings in the tree of minimum length + // 2. + // FIXME: 2 is an approximation which isn't necessarily true for, say, X86. + // If we factor in instruction lengths, we need more information than this. + // FIXME: It'd be nice if we could just have a repeated substring iterator. + std::vector<SuffixTree::RepeatedSubstring> RepeatedSubstrings; + ST.findRepeatedSubstrings(RepeatedSubstrings, 2); - // Figure out if this candidate is beneficial. - unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size(); - - // Too short to be beneficial; skip it. - // FIXME: This isn't necessarily true for, say, X86. If we factor in - // instruction lengths we need more information than this. - if (StringLen < 2) - continue; - - // If this is a beneficial class of candidate, then every one is stored in - // this vector. + for (SuffixTree::RepeatedSubstring &RS : RepeatedSubstrings) { std::vector<Candidate> CandidatesForRepeatedSeq; - - // Figure out the call overhead for each instance of the sequence. - for (auto &ChildPair : Parent.Children) { - SuffixTreeNode *M = ChildPair.second; - - if (M && M->IsInTree && M->isLeaf()) { - // Never visit this leaf again. - M->IsInTree = false; - unsigned StartIdx = M->SuffixIdx; - unsigned EndIdx = StartIdx + StringLen - 1; - - // Trick: Discard some candidates that would be incompatible with the - // ones we've already found for this sequence. This will save us some - // work in candidate selection. - // - // If two candidates overlap, then we can't outline them both. This - // happens when we have candidates that look like, say - // - // AA (where each "A" is an instruction). - // - // We might have some portion of the module that looks like this: - // AAAAAA (6 A's) - // - // In this case, there are 5 different copies of "AA" in this range, but - // at most 3 can be outlined. If only outlining 3 of these is going to - // be unbeneficial, then we ought to not bother. - // - // Note that two things DON'T overlap when they look like this: - // start1...end1 .... start2...end2 - // That is, one must either - // * End before the other starts - // * Start after the other ends - if (std::all_of(CandidatesForRepeatedSeq.begin(), - CandidatesForRepeatedSeq.end(), - [&StartIdx, &EndIdx](const Candidate &C) { - return (EndIdx < C.getStartIdx() || - StartIdx > C.getEndIdx()); - })) { - // It doesn't overlap with anything, so we can outline it. - // Each sequence is over [StartIt, EndIt]. - // Save the candidate and its location. - - MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx]; - MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx]; - - CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt, - EndIt, StartIt->getParent(), - FunctionList.size()); - } + unsigned StringLen = RS.Length; + for (const unsigned &StartIdx : RS.StartIndices) { + unsigned EndIdx = StartIdx + StringLen - 1; + // Trick: Discard some candidates that would be incompatible with the + // ones we've already found for this sequence. This will save us some + // work in candidate selection. + // + // If two candidates overlap, then we can't outline them both. This + // happens when we have candidates that look like, say + // + // AA (where each "A" is an instruction). + // + // We might have some portion of the module that looks like this: + // AAAAAA (6 A's) + // + // In this case, there are 5 different copies of "AA" in this range, but + // at most 3 can be outlined. If only outlining 3 of these is going to + // be unbeneficial, then we ought to not bother. + // + // Note that two things DON'T overlap when they look like this: + // start1...end1 .... start2...end2 + // That is, one must either + // * End before the other starts + // * Start after the other ends + if (std::all_of( + CandidatesForRepeatedSeq.begin(), CandidatesForRepeatedSeq.end(), + [&StartIdx, &EndIdx](const Candidate &C) { + return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx()); + })) { + // It doesn't overlap with anything, so we can outline it. + // Each sequence is over [StartIt, EndIt]. + // Save the candidate and its location. + + MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx]; + MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx]; + + CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt, + EndIt, StartIt->getParent(), + FunctionList.size()); } } @@ -1021,7 +1042,8 @@ unsigned MachineOutliner::findCandidates( continue; std::vector<unsigned> Seq; - for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++) + unsigned StartIdx = RS.StartIndices[0]; // Grab any start index. + for (unsigned i = StartIdx; i < StartIdx + StringLen; i++) Seq.push_back(ST.Str[i]); OF.Sequence = Seq; OF.Name = FunctionList.size(); @@ -1040,9 +1062,6 @@ unsigned MachineOutliner::findCandidates( for (std::shared_ptr<Candidate> &C : OF.Candidates) CandidateList.push_back(C); FunctionList.push_back(OF); - - // Move to the next function. - Parent.IsInTree = false; } return MaxLen; |