summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang-tools-extra/clangd/Headers.h14
-rw-r--r--clang-tools-extra/clangd/SourceCode.cpp12
-rw-r--r--clang-tools-extra/clangd/SourceCode.h9
-rw-r--r--clang-tools-extra/clangd/index/Background.cpp35
-rw-r--r--clang-tools-extra/clangd/index/Background.h5
-rw-r--r--clang-tools-extra/clangd/index/Serialization.cpp75
-rw-r--r--clang-tools-extra/clangd/index/Serialization.h10
-rw-r--r--clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp3
-rw-r--r--clang-tools-extra/unittests/clangd/SerializationTests.cpp47
9 files changed, 148 insertions, 62 deletions
diff --git a/clang-tools-extra/clangd/Headers.h b/clang-tools-extra/clangd/Headers.h
index 519fc47060e..7eead04c649 100644
--- a/clang-tools-extra/clangd/Headers.h
+++ b/clang-tools-extra/clangd/Headers.h
@@ -48,6 +48,20 @@ struct Inclusion {
};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion&);
+// Contains information about one file in the build grpah and its direct
+// dependencies. Doesn't own the strings it references (IncludeGraph is
+// self-contained).
+struct IncludeGraphNode {
+ // True if current file is a main file rather than a header.
+ bool IsTU;
+ llvm::StringRef URI;
+ FileDigest Digest;
+ std::vector<llvm::StringRef> DirectIncludes;
+};
+// FileURI and FileInclusions are references to keys of the map containing
+// them.
+using IncludeGraph = llvm::StringMap<IncludeGraphNode>;
+
// Information captured about the inclusion graph in a translation unit.
// This includes detailed information about the direct #includes, and summary
// information about all transitive includes.
diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp
index 43122859c55..4d276ef2f23 100644
--- a/clang-tools-extra/clangd/SourceCode.cpp
+++ b/clang-tools-extra/clangd/SourceCode.cpp
@@ -227,5 +227,17 @@ bool IsRangeConsecutive(const Range &Left, const Range &Right) {
Left.end.character == Right.start.character;
}
+FileDigest digest(StringRef Content) {
+ return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
+}
+
+Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {
+ bool Invalid = false;
+ StringRef Content = SM.getBufferData(FID, &Invalid);
+ if (Invalid)
+ return None;
+ return digest(Content);
+}
+
} // namespace clangd
} // namespace clang
diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h
index fd5d53b418d..769d0afc721 100644
--- a/clang-tools-extra/clangd/SourceCode.h
+++ b/clang-tools-extra/clangd/SourceCode.h
@@ -16,13 +16,22 @@
#include "Protocol.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
#include "clang/Tooling/Core/Replacement.h"
+#include "llvm/Support/SHA1.h"
namespace clang {
class SourceManager;
namespace clangd {
+// We tend to generate digests for source codes in a lot of different places.
+// This represents the type for those digests to prevent us hard coding details
+// of hashing function at every place that needs to store this information.
+using FileDigest = decltype(llvm::SHA1::hash({}));
+FileDigest digest(StringRef Content);
+Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID);
+
// Counts the number of UTF-16 code units needed to represent a string (LSP
// specifies string lengths in UTF-16 code units).
size_t lspLength(StringRef Code);
diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp
index ecd586531ff..d4ad39580d1 100644
--- a/clang-tools-extra/clangd/index/Background.cpp
+++ b/clang-tools-extra/clangd/index/Background.cpp
@@ -11,6 +11,7 @@
#include "ClangdUnit.h"
#include "Compiler.h"
#include "Logger.h"
+#include "SourceCode.h"
#include "Threading.h"
#include "Trace.h"
#include "URI.h"
@@ -149,19 +150,6 @@ void BackgroundIndex::enqueueTask(Task T) {
QueueCV.notify_all();
}
-static BackgroundIndex::FileDigest digest(StringRef Content) {
- return SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
-}
-
-static Optional<BackgroundIndex::FileDigest> digestFile(const SourceManager &SM,
- FileID FID) {
- bool Invalid = false;
- StringRef Content = SM.getBufferData(FID, &Invalid);
- if (Invalid)
- return None;
- return digest(Content);
-}
-
// Resolves URI to file paths with cache.
class URIToFileCache {
public:
@@ -193,8 +181,7 @@ private:
};
/// Given index results from a TU, only update files in \p FilesToUpdate.
-void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
- RefSlab Refs,
+void BackgroundIndex::update(StringRef MainFile, IndexFileIn Index,
const StringMap<FileDigest> &FilesToUpdate,
BackgroundIndexStorage *IndexStorage) {
// Partition symbols/references into files.
@@ -204,7 +191,7 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
};
StringMap<File> Files;
URIToFileCache URICache(MainFile);
- for (const auto &Sym : Symbols) {
+ for (const auto &Sym : *Index.Symbols) {
if (Sym.CanonicalDeclaration) {
auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI);
if (FilesToUpdate.count(DeclPath) != 0)
@@ -222,7 +209,7 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
}
}
DenseMap<const Ref *, SymbolID> RefToIDs;
- for (const auto &SymRefs : Refs) {
+ for (const auto &SymRefs : *Index.Refs) {
for (const auto &R : SymRefs.second) {
auto Path = URICache.resolve(R.Location.FileURI);
if (FilesToUpdate.count(Path) != 0) {
@@ -250,12 +237,11 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
auto Hash = FilesToUpdate.lookup(Path);
// We need to store shards before updating the index, since the latter
// consumes slabs.
- // FIXME: Store Hash in the Shard.
if (IndexStorage) {
IndexFileOut Shard;
Shard.Symbols = SS.get();
Shard.Refs = RS.get();
- Shard.Digest = &Hash;
+
if (auto Error = IndexStorage->storeShard(Path, Shard))
elog("Failed to write background-index shard for file {0}: {1}", Path,
std::move(Error));
@@ -275,8 +261,8 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
// \p FileDigests contains file digests for the current indexed files, and all
// changed files will be added to \p FilesToUpdate.
decltype(SymbolCollector::Options::FileFilter) createFileFilter(
- const llvm::StringMap<BackgroundIndex::FileDigest> &FileDigests,
- llvm::StringMap<BackgroundIndex::FileDigest> &FilesToUpdate) {
+ const llvm::StringMap<FileDigest> &FileDigests,
+ llvm::StringMap<FileDigest> &FilesToUpdate) {
return [&FileDigests, &FilesToUpdate](const SourceManager &SM, FileID FID) {
StringRef Path;
if (const auto *F = SM.getFileEntryForID(FID))
@@ -375,8 +361,11 @@ Error BackgroundIndex::index(tooling::CompileCommand Cmd,
Symbols.size(), Refs.numRefs());
SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs()));
- update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate,
- IndexStorage);
+ IndexFileIn Index;
+ Index.Symbols = std::move(Symbols);
+ Index.Refs = std::move(Refs);
+
+ update(AbsolutePath, std::move(Index), FilesToUpdate, IndexStorage);
{
// Make sure hash for the main file is always updated even if there is no
// index data in it.
diff --git a/clang-tools-extra/clangd/index/Background.h b/clang-tools-extra/clangd/index/Background.h
index c71e48f2427..e1eb3ccfd18 100644
--- a/clang-tools-extra/clangd/index/Background.h
+++ b/clang-tools-extra/clangd/index/Background.h
@@ -84,11 +84,10 @@ public:
LLVM_NODISCARD bool
blockUntilIdleForTest(llvm::Optional<double> TimeoutSeconds = 10);
- using FileDigest = decltype(llvm::SHA1::hash({}));
-
private:
/// Given index results from a TU, only update files in \p FilesToUpdate.
- void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs,
+ /// Also stores new index information on IndexStorage.
+ void update(llvm::StringRef MainFile, IndexFileIn Index,
const llvm::StringMap<FileDigest> &FilesToUpdate,
BackgroundIndexStorage *IndexStorage);
diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp
index 9eb4d21eb8f..1d17a42c486 100644
--- a/clang-tools-extra/clangd/index/Serialization.cpp
+++ b/clang-tools-extra/clangd/index/Serialization.cpp
@@ -247,6 +247,31 @@ SymbolLocation readLocation(Reader &Data, ArrayRef<StringRef> Strings) {
return Loc;
}
+IncludeGraphNode readIncludeGraphNode(Reader &Data,
+ llvm::ArrayRef<llvm::StringRef> Strings) {
+ IncludeGraphNode IGN;
+ IGN.IsTU = Data.consume8();
+ IGN.URI = Data.consumeString(Strings);
+ llvm::StringRef Digest = Data.consume(IGN.Digest.size());
+ std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
+ IGN.DirectIncludes.resize(Data.consumeVar());
+ for (llvm::StringRef &Include : IGN.DirectIncludes)
+ Include = Data.consumeString(Strings);
+ return IGN;
+}
+
+void writeIncludeGraphNode(const IncludeGraphNode &IGN,
+ const StringTableOut &Strings, raw_ostream &OS) {
+ OS.write(IGN.IsTU);
+ writeVar(Strings.index(IGN.URI), OS);
+ llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
+ IGN.Digest.size());
+ OS << Hash;
+ writeVar(IGN.DirectIncludes.size(), OS);
+ for (llvm::StringRef Include : IGN.DirectIncludes)
+ writeVar(Strings.index(Include), OS);
+}
+
void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
raw_ostream &OS) {
OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
@@ -333,7 +358,7 @@ std::pair<SymbolID, std::vector<Ref>> readRefs(Reader &Data,
// A file is a RIFF chunk with type 'CdIx'.
// It contains the sections:
// - meta: version number
-// - srcs: checksum of the source file
+// - srcs: information related to include graph
// - stri: string table
// - symb: symbols
// - refs: references to symbols
@@ -367,10 +392,20 @@ Expected<IndexFileIn> readRIFF(StringRef Data) {
IndexFileIn Result;
if (Chunks.count("srcs")) {
- Reader Hash(Chunks.lookup("srcs"));
- Result.Digest.emplace();
- llvm::StringRef Digest = Hash.consume(Result.Digest->size());
- std::copy(Digest.bytes_begin(), Digest.bytes_end(), Result.Digest->begin());
+ Reader SrcsReader(Chunks.lookup("srcs"));
+ Result.Sources.emplace();
+ while (!SrcsReader.eof()) {
+ auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
+ auto Entry = Result.Sources->try_emplace(IGN.URI).first;
+ Entry->getValue() = std::move(IGN);
+ // We change all the strings inside the structure to point at the keys in
+ // the map, since it is the only copy of the string that's going to live.
+ Entry->getValue().URI = Entry->getKey();
+ for (auto &Include : Entry->getValue().DirectIncludes)
+ Include = Result.Sources->try_emplace(Include).first->getKey();
+ }
+ if (SrcsReader.err())
+ return makeError("malformed or truncated include uri");
}
if (Chunks.count("symb")) {
@@ -397,6 +432,13 @@ Expected<IndexFileIn> readRIFF(StringRef Data) {
return std::move(Result);
}
+template <class Callback>
+void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {
+ CB(IGN.URI);
+ for (llvm::StringRef &Include : IGN.DirectIncludes)
+ CB(Include);
+}
+
void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
assert(Data.Symbols && "An index file without symbols makes no sense!");
riff::File RIFF;
@@ -409,18 +451,19 @@ void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
}
RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
- if (Data.Digest) {
- llvm::StringRef Hash(reinterpret_cast<const char *>(Data.Digest->data()),
- Data.Digest->size());
- RIFF.Chunks.push_back({riff::fourCC("srcs"), Hash});
- }
-
StringTableOut Strings;
std::vector<Symbol> Symbols;
for (const auto &Sym : *Data.Symbols) {
Symbols.emplace_back(Sym);
visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); });
}
+ std::vector<IncludeGraphNode> Sources;
+ if (Data.Sources)
+ for (const auto &Source : *Data.Sources) {
+ Sources.push_back(Source.getValue());
+ visitStrings(Sources.back(), [&](StringRef &S) { Strings.intern(S); });
+ }
+
std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
if (Data.Refs) {
for (const auto &Sym : *Data.Refs) {
@@ -458,6 +501,16 @@ void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
}
+ std::string SrcsSection;
+ {
+ {
+ raw_string_ostream SrcsOS(SrcsSection);
+ for (const auto &SF : Sources)
+ writeIncludeGraphNode(SF, Strings, SrcsOS);
+ }
+ RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
+ }
+
OS << RIFF;
}
diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h
index add8110a161..c403b95798e 100644
--- a/clang-tools-extra/clangd/index/Serialization.h
+++ b/clang-tools-extra/clangd/index/Serialization.h
@@ -24,6 +24,7 @@
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
+#include "Headers.h"
#include "Index.h"
#include "llvm/Support/Error.h"
@@ -37,11 +38,10 @@ enum class IndexFileFormat {
// Holds the contents of an index file that was read.
struct IndexFileIn {
- using FileDigest = std::array<uint8_t, 20>;
llvm::Optional<SymbolSlab> Symbols;
llvm::Optional<RefSlab> Refs;
- // Digest of the source file that generated the contents.
- llvm::Optional<FileDigest> Digest;
+ // Keys are URIs of the source files.
+ llvm::Optional<IncludeGraph> Sources;
};
// Parse an index file. The input must be a RIFF or YAML file.
llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
@@ -50,8 +50,8 @@ llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
struct IndexFileOut {
const SymbolSlab *Symbols = nullptr;
const RefSlab *Refs = nullptr;
- // Digest of the source file that generated the contents.
- const IndexFileIn::FileDigest *Digest = nullptr;
+ // Keys are URIs of the source files.
+ const IncludeGraph *Sources = nullptr;
// TODO: Support serializing Dex posting lists.
IndexFileFormat Format = IndexFileFormat::RIFF;
diff --git a/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp b/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
index 0beaf6a730d..19263d0165a 100644
--- a/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
+++ b/clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp
@@ -129,8 +129,6 @@ TEST_F(BackgroundIndexTest, ShardStorageWriteTest) {
)cpp";
std::string A_CC = "#include \"A.h\"\nvoid g() { (void)common; }";
FS.Files[testPath("root/A.cc")] = A_CC;
- auto Digest = llvm::SHA1::hash(
- {reinterpret_cast<const uint8_t *>(A_CC.data()), A_CC.size()});
llvm::StringMap<std::string> Storage;
size_t CacheHits = 0;
@@ -165,7 +163,6 @@ TEST_F(BackgroundIndexTest, ShardStorageWriteTest) {
EXPECT_NE(ShardSource, nullptr);
EXPECT_THAT(*ShardSource->Symbols, UnorderedElementsAre());
EXPECT_THAT(*ShardSource->Refs, RefsAre({FileURI("unittest:///root/A.cc")}));
- EXPECT_EQ(*ShardSource->Digest, Digest);
}
} // namespace clangd
diff --git a/clang-tools-extra/unittests/clangd/SerializationTests.cpp b/clang-tools-extra/unittests/clangd/SerializationTests.cpp
index cf1bc428190..a30369b0f77 100644
--- a/clang-tools-extra/unittests/clangd/SerializationTests.cpp
+++ b/clang-tools-extra/unittests/clangd/SerializationTests.cpp
@@ -173,31 +173,44 @@ TEST(SerializationTest, BinaryConversions) {
UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
}
-TEST(SerializationTest, HashTest) {
+TEST(SerializationTest, SrcsTest) {
auto In = readIndexFile(YAML);
EXPECT_TRUE(bool(In)) << In.takeError();
- std::string TestContent("TESTCONTENT");
- auto Digest =
+ std::string TestContent("TestContent");
+ IncludeGraphNode IGN;
+ IGN.Digest =
llvm::SHA1::hash({reinterpret_cast<const uint8_t *>(TestContent.data()),
TestContent.size()});
+ IGN.DirectIncludes = {"inc1", "inc2"};
+ IGN.URI = "URI";
+ IGN.IsTU = true;
+ IncludeGraph Sources;
+ Sources[IGN.URI] = IGN;
// Write to binary format, and parse again.
IndexFileOut Out(*In);
Out.Format = IndexFileFormat::RIFF;
- Out.Digest = &Digest;
- std::string Serialized = to_string(Out);
-
- auto In2 = readIndexFile(Serialized);
- ASSERT_TRUE(bool(In2)) << In.takeError();
- ASSERT_EQ(In2->Digest, Digest);
- ASSERT_TRUE(In2->Symbols);
- ASSERT_TRUE(In2->Refs);
-
- // Assert the YAML serializations match, for nice comparisons and diffs.
- EXPECT_THAT(YAMLFromSymbols(*In2->Symbols),
- UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
- EXPECT_THAT(YAMLFromRefs(*In2->Refs),
- UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
+ Out.Sources = &Sources;
+ {
+ std::string Serialized = to_string(Out);
+
+ auto In = readIndexFile(Serialized);
+ ASSERT_TRUE(bool(In)) << In.takeError();
+ ASSERT_TRUE(In->Symbols);
+ ASSERT_TRUE(In->Refs);
+ ASSERT_TRUE(In->Sources);
+ ASSERT_TRUE(In->Sources->count(IGN.URI));
+ // Assert the YAML serializations match, for nice comparisons and diffs.
+ EXPECT_THAT(YAMLFromSymbols(*In->Symbols),
+ UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
+ EXPECT_THAT(YAMLFromRefs(*In->Refs),
+ UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
+ auto IGNDeserialized = In->Sources->lookup(IGN.URI);
+ EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest);
+ EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes);
+ EXPECT_EQ(IGNDeserialized.URI, IGN.URI);
+ EXPECT_EQ(IGNDeserialized.IsTU, IGN.IsTU);
+ }
}
} // namespace
OpenPOWER on IntegriCloud