diff options
author | Sam McCall <sam.mccall@gmail.com> | 2018-09-04 16:16:50 +0000 |
---|---|---|
committer | Sam McCall <sam.mccall@gmail.com> | 2018-09-04 16:16:50 +0000 |
commit | 50f3631057f717448ba34b4175daaa81215fbd5e (patch) | |
tree | 918408ccfd12bfc7187889ec77d341d17ae386a7 /clang-tools-extra/unittests/clangd/SerializationTests.cpp | |
parent | cc8b507a60677b79fe180681834929e4764e6ece (diff) | |
download | bcm5719-llvm-50f3631057f717448ba34b4175daaa81215fbd5e.tar.gz bcm5719-llvm-50f3631057f717448ba34b4175daaa81215fbd5e.zip |
[clangd] Define a compact binary serialization fomat for symbol slab/index.
Summary:
This is intended to replace the current YAML format for general use.
It's ~10x more compact than YAML, and ~40% more compact than gzipped YAML:
llvmidx.riff = 20M, llvmidx.yaml = 272M, llvmidx.yaml.gz = 32M
It's also simpler/faster to read and write.
The format is a RIFF container (chunks of (type, size, data)) with:
- a compressed string table
- simple binary encoding of symbols (with varints for compactness)
It can be extended to include occurrences, Dex posting lists, etc.
There's no rich backwards-compatibility scheme, but a version number is included
so we can detect incompatible files and do ad-hoc back-compat.
Alternatives considered:
- compressed YAML or JSON: bulky and slow to load
- llvm bitstream: confusing model and libraries are hard to use. My attempt
produced slightly larger files, and the code was longer and slower.
- protobuf or similar: would be really nice (esp for back-compat) but the
dependency is a big hassle
- ad-hoc binary format without a container: it seems clear we're going
to add posting lists and occurrences here, and that they will benefit
from sharing a string table. The container makes it easy to debug
these pieces in isolation, and make them optional.
Reviewers: ioeric
Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, mgrang, arphaman, kadircet, cfe-commits
Differential Revision: https://reviews.llvm.org/D51585
llvm-svn: 341375
Diffstat (limited to 'clang-tools-extra/unittests/clangd/SerializationTests.cpp')
-rw-r--r-- | clang-tools-extra/unittests/clangd/SerializationTests.cpp | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/clang-tools-extra/unittests/clangd/SerializationTests.cpp b/clang-tools-extra/unittests/clangd/SerializationTests.cpp new file mode 100644 index 00000000000..cc430963ca1 --- /dev/null +++ b/clang-tools-extra/unittests/clangd/SerializationTests.cpp @@ -0,0 +1,138 @@ +//===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "index/Serialization.h" +#include "index/SymbolYAML.h" +#include "llvm/Support/ScopedPrinter.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using testing::UnorderedElementsAre; +using testing::UnorderedElementsAreArray; +namespace clang { +namespace clangd { +namespace { + +const char *YAML1 = R"( +--- +ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856 +Name: 'Foo1' +Scope: 'clang::' +SymInfo: + Kind: Function + Lang: Cpp +CanonicalDeclaration: + FileURI: file:///path/foo.h + Start: + Line: 1 + Column: 0 + End: + Line: 1 + Column: 1 +IsIndexedForCodeCompletion: true +Documentation: 'Foo doc' +ReturnType: 'int' +IncludeHeaders: + - Header: 'include1' + References: 7 + - Header: 'include2' + References: 3 +... +)"; + +const char *YAML2 = R"( +--- +ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858 +Name: 'Foo2' +Scope: 'clang::' +SymInfo: + Kind: Function + Lang: Cpp +CanonicalDeclaration: + FileURI: file:///path/bar.h + Start: + Line: 1 + Column: 0 + End: + Line: 1 + Column: 1 +IsIndexedForCodeCompletion: false +Signature: '-sig' +CompletionSnippetSuffix: '-snippet' +... +)"; + +MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; } +MATCHER_P2(IncludeHeaderWithRef, IncludeHeader, References, "") { + return (arg.IncludeHeader == IncludeHeader) && (arg.References == References); +} + +TEST(SerializationTest, YAMLConversions) { + auto Symbols1 = symbolsFromYAML(YAML1); + ASSERT_EQ(Symbols1.size(), 1u); + const auto &Sym1 = *Symbols1.begin(); + EXPECT_THAT(Sym1, QName("clang::Foo1")); + EXPECT_EQ(Sym1.Signature, ""); + EXPECT_EQ(Sym1.Documentation, "Foo doc"); + EXPECT_EQ(Sym1.ReturnType, "int"); + EXPECT_EQ(Sym1.CanonicalDeclaration.FileURI, "file:///path/foo.h"); + EXPECT_TRUE(Sym1.IsIndexedForCodeCompletion); + EXPECT_THAT(Sym1.IncludeHeaders, + UnorderedElementsAre(IncludeHeaderWithRef("include1", 7u), + IncludeHeaderWithRef("include2", 3u))); + + auto Symbols2 = symbolsFromYAML(YAML2); + ASSERT_EQ(Symbols2.size(), 1u); + const auto &Sym2 = *Symbols2.begin(); + EXPECT_THAT(Sym2, QName("clang::Foo2")); + EXPECT_EQ(Sym2.Signature, "-sig"); + EXPECT_EQ(Sym2.ReturnType, ""); + EXPECT_EQ(Sym2.CanonicalDeclaration.FileURI, "file:///path/bar.h"); + EXPECT_FALSE(Sym2.IsIndexedForCodeCompletion); + + std::string ConcatenatedYAML; + { + llvm::raw_string_ostream OS(ConcatenatedYAML); + SymbolsToYAML(Symbols1, OS); + SymbolsToYAML(Symbols2, OS); + } + auto ConcatenatedSymbols = symbolsFromYAML(ConcatenatedYAML); + EXPECT_THAT(ConcatenatedSymbols, + UnorderedElementsAre(QName("clang::Foo1"), QName("clang::Foo2"))); +} + +std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) { + std::vector<std::string> Result; + for (const auto &Sym : Slab) + Result.push_back(SymbolToYAML(Sym)); + return Result; +} + +TEST(SerializationTest, BinaryConversions) { + // We reuse the test symbols from YAML. + auto Slab = symbolsFromYAML(std::string(YAML1) + YAML2); + ASSERT_EQ(Slab.size(), 2u); + + // Write to binary format, and parse again. + IndexFileOut Out; + Out.Symbols = &Slab; + std::string Serialized = llvm::to_string(Out); + + auto In = readIndexFile(Serialized); + ASSERT_TRUE(bool(In)) << In.takeError(); + ASSERT_TRUE(In->Symbols); + + // Assert the YAML serializations match, for nice comparisons and diffs. + EXPECT_THAT(YAMLFromSymbols(*In->Symbols), + UnorderedElementsAreArray(YAMLFromSymbols(Slab))); +} + +} // namespace +} // namespace clangd +} // namespace clang |