summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Support/SourceMgr.cpp
diff options
context:
space:
mode:
authorGraydon Hoare <ghoare@apple.com>2018-04-07 00:44:02 +0000
committerGraydon Hoare <ghoare@apple.com>2018-04-07 00:44:02 +0000
commit54fe208a5f366d776511fe99fef857490eb4426f (patch)
tree3f47544e006348a0c326107a052c9db233f62425 /llvm/lib/Support/SourceMgr.cpp
parent8a5ea618866ab00e1e4fd7d3083871cbd742046a (diff)
downloadbcm5719-llvm-54fe208a5f366d776511fe99fef857490eb4426f.tar.gz
bcm5719-llvm-54fe208a5f366d776511fe99fef857490eb4426f.zip
[Support] Make line-number cache robust against access patterns.
Summary: The LLVM SourceMgr class (which is used indirectly by Swift, though not Clang) has a routine for looking up line numbers of SMLocs. This routine uses a shared, special-purpose cache that handles exactly one access pattern efficiently: looking up the line number of an SMLoc that points into the same buffer as the last query made to the SourceMgr, at a location in the buffer at or ahead of the last query. When this works it's fine, but when it fails it's catastrophic for performancer: one recent out-of-order access from a Swift utility routine ran for tens of seconds, spending 99% of its time repeatedly scanning buffers for '\n'. This change removes the shared cache from the SourceMgr and installs a new cache in each SrcBuffer. The per-SrcBuffer caches are also "full", in the sense that rather than caching a single last-query pointer, they cache _all_ the line-ending offsets, in a binary-searchable array, such that once it's populated (on first access), all subsequent access patterns run at the same speed. Performance measurements I've done show this is actually a little bit faster on real codebases (though only a couple fractions of a percent). Memory usage is up by a few tens to hundreds of bytes per SrcBuffer that has a line lookup done on it; I've attempted to minimize this by using dynamic selection of integer sized when storing offset arrays. But the main motive here is to make-impossible the cases we don't always see, that show up by surprise when there is an out-of-order access pattern. Reviewers: jordan_rose Reviewed By: jordan_rose Subscribers: probinson, llvm-commits Differential Revision: https://reviews.llvm.org/D45003 llvm-svn: 329470
Diffstat (limited to 'llvm/lib/Support/SourceMgr.cpp')
-rw-r--r--llvm/lib/Support/SourceMgr.cpp125
1 files changed, 74 insertions, 51 deletions
diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp
index a8f6208a558..089fcdb59d9 100644
--- a/llvm/lib/Support/SourceMgr.cpp
+++ b/llvm/lib/Support/SourceMgr.cpp
@@ -28,6 +28,7 @@
#include <algorithm>
#include <cassert>
#include <cstddef>
+#include <limits>
#include <memory>
#include <string>
#include <utility>
@@ -36,24 +37,6 @@ using namespace llvm;
static const size_t TabStop = 8;
-namespace {
-
- struct LineNoCacheTy {
- const char *LastQuery;
- unsigned LastQueryBufferID;
- unsigned LineNoOfQuery;
- };
-
-} // end anonymous namespace
-
-static LineNoCacheTy *getCache(void *Ptr) {
- return (LineNoCacheTy*)Ptr;
-}
-
-SourceMgr::~SourceMgr() {
- delete getCache(LineNoCache);
-}
-
unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
SMLoc IncludeLoc,
std::string &IncludedFile) {
@@ -85,46 +68,86 @@ unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
return 0;
}
-std::pair<unsigned, unsigned>
-SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
- if (!BufferID)
- BufferID = FindBufferContainingLoc(Loc);
- assert(BufferID && "Invalid Location!");
+template <typename T>
+unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
+
+ // Ensure OffsetCache is allocated and populated with offsets of all the
+ // '\n' bytes.
+ std::vector<T> *Offsets = nullptr;
+ if (OffsetCache.isNull()) {
+ Offsets = new std::vector<T>();
+ OffsetCache = Offsets;
+ size_t Sz = Buffer->getBufferSize();
+ assert(Sz <= std::numeric_limits<T>::max());
+ StringRef S = Buffer->getBuffer();
+ for (size_t N = 0; N < Sz; ++N) {
+ if (S[N] == '\n') {
+ Offsets->push_back(static_cast<T>(N));
+ }
+ }
+ } else {
+ Offsets = OffsetCache.get<std::vector<T> *>();
+ }
- const MemoryBuffer *Buff = getMemoryBuffer(BufferID);
+ const char *BufStart = Buffer->getBufferStart();
+ assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd());
+ ptrdiff_t PtrDiff = Ptr - BufStart;
+ assert(PtrDiff >= 0 && static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max());
+ T PtrOffset = static_cast<T>(PtrDiff);
- // Count the number of \n's between the start of the file and the specified
- // location.
- unsigned LineNo = 1;
+ // std::lower_bound returns the first EOL offset that's not-less-than
+ // PtrOffset, meaning the EOL that _ends the line_ that PtrOffset is on
+ // (including if PtrOffset refers to the EOL itself). If there's no such
+ // EOL, returns end().
+ auto EOL = std::lower_bound(Offsets->begin(), Offsets->end(), PtrOffset);
- const char *BufStart = Buff->getBufferStart();
- const char *Ptr = BufStart;
+ // Lines count from 1, so add 1 to the distance from the 0th line.
+ return (1 + (EOL - Offsets->begin()));
+}
- // If we have a line number cache, and if the query is to a later point in the
- // same file, start searching from the last query location. This optimizes
- // for the case when multiple diagnostics come out of one file in order.
- if (LineNoCacheTy *Cache = getCache(LineNoCache))
- if (Cache->LastQueryBufferID == BufferID &&
- Cache->LastQuery <= Loc.getPointer()) {
- Ptr = Cache->LastQuery;
- LineNo = Cache->LineNoOfQuery;
- }
+SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other)
+ : Buffer(std::move(Other.Buffer)),
+ OffsetCache(Other.OffsetCache),
+ IncludeLoc(Other.IncludeLoc) {
+ Other.OffsetCache = nullptr;
+}
- // Scan for the location being queried, keeping track of the number of lines
- // we see.
- for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr)
- if (*Ptr == '\n') ++LineNo;
+SourceMgr::SrcBuffer::~SrcBuffer() {
+ if (!OffsetCache.isNull()) {
+ if (OffsetCache.is<std::vector<uint8_t>*>())
+ delete OffsetCache.get<std::vector<uint8_t>*>();
+ else if (OffsetCache.is<std::vector<uint16_t>*>())
+ delete OffsetCache.get<std::vector<uint16_t>*>();
+ else if (OffsetCache.is<std::vector<uint32_t>*>())
+ delete OffsetCache.get<std::vector<uint32_t>*>();
+ else
+ delete OffsetCache.get<std::vector<uint64_t>*>();
+ OffsetCache = nullptr;
+ }
+}
- // Allocate the line number cache if it doesn't exist.
- if (!LineNoCache)
- LineNoCache = new LineNoCacheTy();
+std::pair<unsigned, unsigned>
+SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
+ if (!BufferID)
+ BufferID = FindBufferContainingLoc(Loc);
+ assert(BufferID && "Invalid Location!");
- // Update the line # cache.
- LineNoCacheTy &Cache = *getCache(LineNoCache);
- Cache.LastQueryBufferID = BufferID;
- Cache.LastQuery = Ptr;
- Cache.LineNoOfQuery = LineNo;
-
+ auto &SB = getBufferInfo(BufferID);
+ const char *Ptr = Loc.getPointer();
+
+ size_t Sz = SB.Buffer->getBufferSize();
+ assert(Sz <= std::numeric_limits<uint64_t>::max());
+ unsigned LineNo;
+ if (Sz <= std::numeric_limits<uint8_t>::max())
+ LineNo = SB.getLineNumber<uint8_t>(Ptr);
+ else if (Sz <= std::numeric_limits<uint16_t>::max())
+ LineNo = SB.getLineNumber<uint16_t>(Ptr);
+ else if (Sz <= std::numeric_limits<uint32_t>::max())
+ LineNo = SB.getLineNumber<uint32_t>(Ptr);
+ else
+ LineNo = SB.getLineNumber<uint64_t>(Ptr);
+
+ const char *BufStart = SB.Buffer->getBufferStart();
size_t NewlineOffs = StringRef(BufStart, Ptr-BufStart).find_last_of("\n\r");
if (NewlineOffs == StringRef::npos) NewlineOffs = ~(size_t)0;
return std::make_pair(LineNo, Ptr-BufStart-NewlineOffs);
OpenPOWER on IntegriCloud