From 54fe208a5f366d776511fe99fef857490eb4426f Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Sat, 7 Apr 2018 00:44:02 +0000 Subject: [Support] Make line-number cache robust against access patterns. Summary: The LLVM SourceMgr class (which is used indirectly by Swift, though not Clang) has a routine for looking up line numbers of SMLocs. This routine uses a shared, special-purpose cache that handles exactly one access pattern efficiently: looking up the line number of an SMLoc that points into the same buffer as the last query made to the SourceMgr, at a location in the buffer at or ahead of the last query. When this works it's fine, but when it fails it's catastrophic for performancer: one recent out-of-order access from a Swift utility routine ran for tens of seconds, spending 99% of its time repeatedly scanning buffers for '\n'. This change removes the shared cache from the SourceMgr and installs a new cache in each SrcBuffer. The per-SrcBuffer caches are also "full", in the sense that rather than caching a single last-query pointer, they cache _all_ the line-ending offsets, in a binary-searchable array, such that once it's populated (on first access), all subsequent access patterns run at the same speed. Performance measurements I've done show this is actually a little bit faster on real codebases (though only a couple fractions of a percent). Memory usage is up by a few tens to hundreds of bytes per SrcBuffer that has a line lookup done on it; I've attempted to minimize this by using dynamic selection of integer sized when storing offset arrays. But the main motive here is to make-impossible the cases we don't always see, that show up by surprise when there is an out-of-order access pattern. Reviewers: jordan_rose Reviewed By: jordan_rose Subscribers: probinson, llvm-commits Differential Revision: https://reviews.llvm.org/D45003 llvm-svn: 329470 --- llvm/lib/Support/SourceMgr.cpp | 125 ++++++++++++++++++++++++----------------- 1 file changed, 74 insertions(+), 51 deletions(-) (limited to 'llvm/lib/Support') diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp index a8f6208a558..089fcdb59d9 100644 --- a/llvm/lib/Support/SourceMgr.cpp +++ b/llvm/lib/Support/SourceMgr.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -36,24 +37,6 @@ using namespace llvm; static const size_t TabStop = 8; -namespace { - - struct LineNoCacheTy { - const char *LastQuery; - unsigned LastQueryBufferID; - unsigned LineNoOfQuery; - }; - -} // end anonymous namespace - -static LineNoCacheTy *getCache(void *Ptr) { - return (LineNoCacheTy*)Ptr; -} - -SourceMgr::~SourceMgr() { - delete getCache(LineNoCache); -} - unsigned SourceMgr::AddIncludeFile(const std::string &Filename, SMLoc IncludeLoc, std::string &IncludedFile) { @@ -85,46 +68,86 @@ unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const { return 0; } -std::pair -SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const { - if (!BufferID) - BufferID = FindBufferContainingLoc(Loc); - assert(BufferID && "Invalid Location!"); +template +unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const { + + // Ensure OffsetCache is allocated and populated with offsets of all the + // '\n' bytes. + std::vector *Offsets = nullptr; + if (OffsetCache.isNull()) { + Offsets = new std::vector(); + OffsetCache = Offsets; + size_t Sz = Buffer->getBufferSize(); + assert(Sz <= std::numeric_limits::max()); + StringRef S = Buffer->getBuffer(); + for (size_t N = 0; N < Sz; ++N) { + if (S[N] == '\n') { + Offsets->push_back(static_cast(N)); + } + } + } else { + Offsets = OffsetCache.get *>(); + } - const MemoryBuffer *Buff = getMemoryBuffer(BufferID); + const char *BufStart = Buffer->getBufferStart(); + assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd()); + ptrdiff_t PtrDiff = Ptr - BufStart; + assert(PtrDiff >= 0 && static_cast(PtrDiff) <= std::numeric_limits::max()); + T PtrOffset = static_cast(PtrDiff); - // Count the number of \n's between the start of the file and the specified - // location. - unsigned LineNo = 1; + // std::lower_bound returns the first EOL offset that's not-less-than + // PtrOffset, meaning the EOL that _ends the line_ that PtrOffset is on + // (including if PtrOffset refers to the EOL itself). If there's no such + // EOL, returns end(). + auto EOL = std::lower_bound(Offsets->begin(), Offsets->end(), PtrOffset); - const char *BufStart = Buff->getBufferStart(); - const char *Ptr = BufStart; + // Lines count from 1, so add 1 to the distance from the 0th line. + return (1 + (EOL - Offsets->begin())); +} - // If we have a line number cache, and if the query is to a later point in the - // same file, start searching from the last query location. This optimizes - // for the case when multiple diagnostics come out of one file in order. - if (LineNoCacheTy *Cache = getCache(LineNoCache)) - if (Cache->LastQueryBufferID == BufferID && - Cache->LastQuery <= Loc.getPointer()) { - Ptr = Cache->LastQuery; - LineNo = Cache->LineNoOfQuery; - } +SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other) + : Buffer(std::move(Other.Buffer)), + OffsetCache(Other.OffsetCache), + IncludeLoc(Other.IncludeLoc) { + Other.OffsetCache = nullptr; +} - // Scan for the location being queried, keeping track of the number of lines - // we see. - for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr) - if (*Ptr == '\n') ++LineNo; +SourceMgr::SrcBuffer::~SrcBuffer() { + if (!OffsetCache.isNull()) { + if (OffsetCache.is*>()) + delete OffsetCache.get*>(); + else if (OffsetCache.is*>()) + delete OffsetCache.get*>(); + else if (OffsetCache.is*>()) + delete OffsetCache.get*>(); + else + delete OffsetCache.get*>(); + OffsetCache = nullptr; + } +} - // Allocate the line number cache if it doesn't exist. - if (!LineNoCache) - LineNoCache = new LineNoCacheTy(); +std::pair +SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const { + if (!BufferID) + BufferID = FindBufferContainingLoc(Loc); + assert(BufferID && "Invalid Location!"); - // Update the line # cache. - LineNoCacheTy &Cache = *getCache(LineNoCache); - Cache.LastQueryBufferID = BufferID; - Cache.LastQuery = Ptr; - Cache.LineNoOfQuery = LineNo; - + auto &SB = getBufferInfo(BufferID); + const char *Ptr = Loc.getPointer(); + + size_t Sz = SB.Buffer->getBufferSize(); + assert(Sz <= std::numeric_limits::max()); + unsigned LineNo; + if (Sz <= std::numeric_limits::max()) + LineNo = SB.getLineNumber(Ptr); + else if (Sz <= std::numeric_limits::max()) + LineNo = SB.getLineNumber(Ptr); + else if (Sz <= std::numeric_limits::max()) + LineNo = SB.getLineNumber(Ptr); + else + LineNo = SB.getLineNumber(Ptr); + + const char *BufStart = SB.Buffer->getBufferStart(); size_t NewlineOffs = StringRef(BufStart, Ptr-BufStart).find_last_of("\n\r"); if (NewlineOffs == StringRef::npos) NewlineOffs = ~(size_t)0; return std::make_pair(LineNo, Ptr-BufStart-NewlineOffs); -- cgit v1.2.3