diff options
author | Graydon Hoare <ghoare@apple.com> | 2018-04-07 00:44:02 +0000 |
---|---|---|
committer | Graydon Hoare <ghoare@apple.com> | 2018-04-07 00:44:02 +0000 |
commit | 54fe208a5f366d776511fe99fef857490eb4426f (patch) | |
tree | 3f47544e006348a0c326107a052c9db233f62425 /llvm/lib/Support/SourceMgr.cpp | |
parent | 8a5ea618866ab00e1e4fd7d3083871cbd742046a (diff) | |
download | bcm5719-llvm-54fe208a5f366d776511fe99fef857490eb4426f.tar.gz bcm5719-llvm-54fe208a5f366d776511fe99fef857490eb4426f.zip |
[Support] Make line-number cache robust against access patterns.
Summary:
The LLVM SourceMgr class (which is used indirectly by Swift, though not Clang)
has a routine for looking up line numbers of SMLocs. This routine uses a
shared, special-purpose cache that handles exactly one access pattern
efficiently: looking up the line number of an SMLoc that points into the same
buffer as the last query made to the SourceMgr, at a location in the buffer at
or ahead of the last query.
When this works it's fine, but when it fails it's catastrophic for performancer:
one recent out-of-order access from a Swift utility routine ran for tens of
seconds, spending 99% of its time repeatedly scanning buffers for '\n'.
This change removes the shared cache from the SourceMgr and installs a new
cache in each SrcBuffer. The per-SrcBuffer caches are also "full", in the sense
that rather than caching a single last-query pointer, they cache _all_ the
line-ending offsets, in a binary-searchable array, such that once it's
populated (on first access), all subsequent access patterns run at the same
speed.
Performance measurements I've done show this is actually a little bit faster on
real codebases (though only a couple fractions of a percent). Memory usage is
up by a few tens to hundreds of bytes per SrcBuffer that has a line lookup done
on it; I've attempted to minimize this by using dynamic selection of integer
sized when storing offset arrays. But the main motive here is to
make-impossible the cases we don't always see, that show up by surprise when
there is an out-of-order access pattern.
Reviewers: jordan_rose
Reviewed By: jordan_rose
Subscribers: probinson, llvm-commits
Differential Revision: https://reviews.llvm.org/D45003
llvm-svn: 329470
Diffstat (limited to 'llvm/lib/Support/SourceMgr.cpp')
-rw-r--r-- | llvm/lib/Support/SourceMgr.cpp | 125 |
1 files changed, 74 insertions, 51 deletions
diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp index a8f6208a558..089fcdb59d9 100644 --- a/llvm/lib/Support/SourceMgr.cpp +++ b/llvm/lib/Support/SourceMgr.cpp @@ -28,6 +28,7 @@ #include <algorithm> #include <cassert> #include <cstddef> +#include <limits> #include <memory> #include <string> #include <utility> @@ -36,24 +37,6 @@ using namespace llvm; static const size_t TabStop = 8; -namespace { - - struct LineNoCacheTy { - const char *LastQuery; - unsigned LastQueryBufferID; - unsigned LineNoOfQuery; - }; - -} // end anonymous namespace - -static LineNoCacheTy *getCache(void *Ptr) { - return (LineNoCacheTy*)Ptr; -} - -SourceMgr::~SourceMgr() { - delete getCache(LineNoCache); -} - unsigned SourceMgr::AddIncludeFile(const std::string &Filename, SMLoc IncludeLoc, std::string &IncludedFile) { @@ -85,46 +68,86 @@ unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const { return 0; } -std::pair<unsigned, unsigned> -SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const { - if (!BufferID) - BufferID = FindBufferContainingLoc(Loc); - assert(BufferID && "Invalid Location!"); +template <typename T> +unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const { + + // Ensure OffsetCache is allocated and populated with offsets of all the + // '\n' bytes. + std::vector<T> *Offsets = nullptr; + if (OffsetCache.isNull()) { + Offsets = new std::vector<T>(); + OffsetCache = Offsets; + size_t Sz = Buffer->getBufferSize(); + assert(Sz <= std::numeric_limits<T>::max()); + StringRef S = Buffer->getBuffer(); + for (size_t N = 0; N < Sz; ++N) { + if (S[N] == '\n') { + Offsets->push_back(static_cast<T>(N)); + } + } + } else { + Offsets = OffsetCache.get<std::vector<T> *>(); + } - const MemoryBuffer *Buff = getMemoryBuffer(BufferID); + const char *BufStart = Buffer->getBufferStart(); + assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd()); + ptrdiff_t PtrDiff = Ptr - BufStart; + assert(PtrDiff >= 0 && static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max()); + T PtrOffset = static_cast<T>(PtrDiff); - // Count the number of \n's between the start of the file and the specified - // location. - unsigned LineNo = 1; + // std::lower_bound returns the first EOL offset that's not-less-than + // PtrOffset, meaning the EOL that _ends the line_ that PtrOffset is on + // (including if PtrOffset refers to the EOL itself). If there's no such + // EOL, returns end(). + auto EOL = std::lower_bound(Offsets->begin(), Offsets->end(), PtrOffset); - const char *BufStart = Buff->getBufferStart(); - const char *Ptr = BufStart; + // Lines count from 1, so add 1 to the distance from the 0th line. + return (1 + (EOL - Offsets->begin())); +} - // If we have a line number cache, and if the query is to a later point in the - // same file, start searching from the last query location. This optimizes - // for the case when multiple diagnostics come out of one file in order. - if (LineNoCacheTy *Cache = getCache(LineNoCache)) - if (Cache->LastQueryBufferID == BufferID && - Cache->LastQuery <= Loc.getPointer()) { - Ptr = Cache->LastQuery; - LineNo = Cache->LineNoOfQuery; - } +SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other) + : Buffer(std::move(Other.Buffer)), + OffsetCache(Other.OffsetCache), + IncludeLoc(Other.IncludeLoc) { + Other.OffsetCache = nullptr; +} - // Scan for the location being queried, keeping track of the number of lines - // we see. - for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr) - if (*Ptr == '\n') ++LineNo; +SourceMgr::SrcBuffer::~SrcBuffer() { + if (!OffsetCache.isNull()) { + if (OffsetCache.is<std::vector<uint8_t>*>()) + delete OffsetCache.get<std::vector<uint8_t>*>(); + else if (OffsetCache.is<std::vector<uint16_t>*>()) + delete OffsetCache.get<std::vector<uint16_t>*>(); + else if (OffsetCache.is<std::vector<uint32_t>*>()) + delete OffsetCache.get<std::vector<uint32_t>*>(); + else + delete OffsetCache.get<std::vector<uint64_t>*>(); + OffsetCache = nullptr; + } +} - // Allocate the line number cache if it doesn't exist. - if (!LineNoCache) - LineNoCache = new LineNoCacheTy(); +std::pair<unsigned, unsigned> +SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const { + if (!BufferID) + BufferID = FindBufferContainingLoc(Loc); + assert(BufferID && "Invalid Location!"); - // Update the line # cache. - LineNoCacheTy &Cache = *getCache(LineNoCache); - Cache.LastQueryBufferID = BufferID; - Cache.LastQuery = Ptr; - Cache.LineNoOfQuery = LineNo; - + auto &SB = getBufferInfo(BufferID); + const char *Ptr = Loc.getPointer(); + + size_t Sz = SB.Buffer->getBufferSize(); + assert(Sz <= std::numeric_limits<uint64_t>::max()); + unsigned LineNo; + if (Sz <= std::numeric_limits<uint8_t>::max()) + LineNo = SB.getLineNumber<uint8_t>(Ptr); + else if (Sz <= std::numeric_limits<uint16_t>::max()) + LineNo = SB.getLineNumber<uint16_t>(Ptr); + else if (Sz <= std::numeric_limits<uint32_t>::max()) + LineNo = SB.getLineNumber<uint32_t>(Ptr); + else + LineNo = SB.getLineNumber<uint64_t>(Ptr); + + const char *BufStart = SB.Buffer->getBufferStart(); size_t NewlineOffs = StringRef(BufStart, Ptr-BufStart).find_last_of("\n\r"); if (NewlineOffs == StringRef::npos) NewlineOffs = ~(size_t)0; return std::make_pair(LineNo, Ptr-BufStart-NewlineOffs); |