summaryrefslogtreecommitdiffstats
path: root/clang/unittests/Tooling/Syntax
diff options
context:
space:
mode:
authorIlya Biryukov <ibiryukov@google.com>2019-05-20 13:00:42 +0000
committerIlya Biryukov <ibiryukov@google.com>2019-05-20 13:00:42 +0000
commitddd5d5dbc8ddcf37fd3b29f6568ad42ea9f882e1 (patch)
tree11de7b1a45d858173f7eea763ac38f068b2f08e5 /clang/unittests/Tooling/Syntax
parent03a7353fa051356e5f59c1b30b02922b062a2deb (diff)
downloadbcm5719-llvm-ddd5d5dbc8ddcf37fd3b29f6568ad42ea9f882e1.tar.gz
bcm5719-llvm-ddd5d5dbc8ddcf37fd3b29f6568ad42ea9f882e1.zip
[Syntax] Introduce TokenBuffer, start clangToolingSyntax library
Summary: TokenBuffer stores the list of tokens for a file obtained after preprocessing. This is a base building block for syntax trees, see [1] for the full proposal on syntax trees. This commits also starts a new sub-library of ClangTooling, which would be the home for the syntax trees and syntax-tree-based refactoring utilities. [1]: https://lists.llvm.org/pipermail/cfe-dev/2019-February/061414.html Reviewers: gribozavr, sammccall Reviewed By: sammccall Subscribers: mgrang, riccibruno, Eugene.Zelenko, mgorny, jdoerfert, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D59887 llvm-svn: 361148
Diffstat (limited to 'clang/unittests/Tooling/Syntax')
-rw-r--r--clang/unittests/Tooling/Syntax/CMakeLists.txt20
-rw-r--r--clang/unittests/Tooling/Syntax/TokensTest.cpp654
2 files changed, 674 insertions, 0 deletions
diff --git a/clang/unittests/Tooling/Syntax/CMakeLists.txt b/clang/unittests/Tooling/Syntax/CMakeLists.txt
new file mode 100644
index 00000000000..a38479aa9b5
--- /dev/null
+++ b/clang/unittests/Tooling/Syntax/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(LLVM_LINK_COMPONENTS
+ ${LLVM_TARGETS_TO_BUILD}
+ Support
+ )
+
+add_clang_unittest(TokensTest
+ TokensTest.cpp
+)
+
+target_link_libraries(TokensTest
+ PRIVATE
+ clangAST
+ clangBasic
+ clangFrontend
+ clangLex
+ clangSerialization
+ clangTooling
+ clangToolingSyntax
+ LLVMTestingSupport
+ )
diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp
new file mode 100644
index 00000000000..ef3d8f36899
--- /dev/null
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -0,0 +1,654 @@
+//===- TokensTest.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Syntax/Tokens.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/Expr.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticIDs.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/FileSystemOptions.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.def"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendAction.h"
+#include "clang/Frontend/Utils.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Lex/Token.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Testing/Support/Annotations.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include <cassert>
+#include <cstdlib>
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include <memory>
+#include <ostream>
+#include <string>
+
+using namespace clang;
+using namespace clang::syntax;
+
+using llvm::ValueIs;
+using ::testing::AllOf;
+using ::testing::Contains;
+using ::testing::ElementsAre;
+using ::testing::Matcher;
+using ::testing::Not;
+using ::testing::StartsWith;
+
+namespace {
+// Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
+// argument.
+MATCHER_P(SameRange, A, "") {
+ return A.begin() == arg.begin() && A.end() == arg.end();
+}
+// Matchers for syntax::Token.
+MATCHER_P(Kind, K, "") { return arg.kind() == K; }
+MATCHER_P2(HasText, Text, SourceMgr, "") {
+ return arg.text(*SourceMgr) == Text;
+}
+/// Checks the start and end location of a token are equal to SourceRng.
+MATCHER_P(RangeIs, SourceRng, "") {
+ return arg.location() == SourceRng.first &&
+ arg.endLocation() == SourceRng.second;
+}
+
+class TokenCollectorTest : public ::testing::Test {
+public:
+ /// Run the clang frontend, collect the preprocessed tokens from the frontend
+ /// invocation and store them in this->Buffer.
+ /// This also clears SourceManager before running the compiler.
+ void recordTokens(llvm::StringRef Code) {
+ class RecordTokens : public ASTFrontendAction {
+ public:
+ explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
+
+ bool BeginSourceFileAction(CompilerInstance &CI) override {
+ assert(!Collector && "expected only a single call to BeginSourceFile");
+ Collector.emplace(CI.getPreprocessor());
+ return true;
+ }
+ void EndSourceFileAction() override {
+ assert(Collector && "BeginSourceFileAction was never called");
+ Result = std::move(*Collector).consume();
+ }
+
+ std::unique_ptr<ASTConsumer>
+ CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
+ return llvm::make_unique<ASTConsumer>();
+ }
+
+ private:
+ TokenBuffer &Result;
+ llvm::Optional<TokenCollector> Collector;
+ };
+
+ constexpr const char *FileName = "./input.cpp";
+ FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
+ // Prepare to run a compiler.
+ std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
+ FileName};
+ auto CI = createInvocationFromCommandLine(Args, Diags, FS);
+ assert(CI);
+ CI->getFrontendOpts().DisableFree = false;
+ CI->getPreprocessorOpts().addRemappedFile(
+ FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
+ CompilerInstance Compiler;
+ Compiler.setInvocation(std::move(CI));
+ if (!Diags->getClient())
+ Diags->setClient(new IgnoringDiagConsumer);
+ Compiler.setDiagnostics(Diags.get());
+ Compiler.setFileManager(FileMgr.get());
+ Compiler.setSourceManager(SourceMgr.get());
+
+ this->Buffer = TokenBuffer(*SourceMgr);
+ RecordTokens Recorder(this->Buffer);
+ ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
+ << "failed to run the frontend";
+ }
+
+ /// Record the tokens and return a test dump of the resulting buffer.
+ std::string collectAndDump(llvm::StringRef Code) {
+ recordTokens(Code);
+ return Buffer.dumpForTests();
+ }
+
+ // Adds a file to the test VFS.
+ void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
+ if (!FS->addFile(Path, time_t(),
+ llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
+ ADD_FAILURE() << "could not add a file to VFS: " << Path;
+ }
+ }
+
+ /// Add a new file, run syntax::tokenize() on it and return the results.
+ std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
+ // FIXME: pass proper LangOptions.
+ return syntax::tokenize(
+ SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)),
+ *SourceMgr, LangOptions());
+ }
+
+ // Specialized versions of matchers that hide the SourceManager from clients.
+ Matcher<syntax::Token> HasText(std::string Text) const {
+ return ::HasText(Text, SourceMgr.get());
+ }
+ Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
+ std::pair<SourceLocation, SourceLocation> Ls;
+ Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
+ .getLocWithOffset(R.Begin);
+ Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
+ .getLocWithOffset(R.End);
+ return ::RangeIs(Ls);
+ }
+
+ /// Finds a subrange in O(n * m).
+ template <class T, class U, class Eq>
+ llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
+ llvm::ArrayRef<T> Range, Eq F) {
+ for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
+ auto It = Begin;
+ for (auto ItSub = Subrange.begin();
+ ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
+ if (!F(*ItSub, *It))
+ goto continue_outer;
+ }
+ return llvm::makeArrayRef(Begin, It);
+ continue_outer:;
+ }
+ return llvm::makeArrayRef(Range.end(), Range.end());
+ }
+
+ /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
+ /// The match should be unique. \p Query is a whitespace-separated list of
+ /// tokens to search for.
+ llvm::ArrayRef<syntax::Token>
+ findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
+ llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
+ Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+ if (QueryTokens.empty()) {
+ ADD_FAILURE() << "will not look for an empty list of tokens";
+ std::abort();
+ }
+ // An equality test for search.
+ auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
+ return Q == T.text(*SourceMgr);
+ };
+ // Find a match.
+ auto Found =
+ findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches);
+ if (Found.begin() == Tokens.end()) {
+ ADD_FAILURE() << "could not find the subrange for " << Query;
+ std::abort();
+ }
+ // Check that the match is unique.
+ if (findSubrange(llvm::makeArrayRef(QueryTokens),
+ llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches)
+ .begin() != Tokens.end()) {
+ ADD_FAILURE() << "match is not unique for " << Query;
+ std::abort();
+ }
+ return Found;
+ };
+
+ // Specialized versions of findTokenRange for expanded and spelled tokens.
+ llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
+ return findTokenRange(Query, Buffer.expandedTokens());
+ }
+ llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
+ FileID File = FileID()) {
+ if (!File.isValid())
+ File = SourceMgr->getMainFileID();
+ return findTokenRange(Query, Buffer.spelledTokens(File));
+ }
+
+ // Data fields.
+ llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
+ new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
+ IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
+ new llvm::vfs::InMemoryFileSystem;
+ llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
+ new FileManager(FileSystemOptions(), FS);
+ llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
+ new SourceManager(*Diags, *FileMgr);
+ /// Contains last result of calling recordTokens().
+ TokenBuffer Buffer = TokenBuffer(*SourceMgr);
+};
+
+TEST_F(TokenCollectorTest, RawMode) {
+ EXPECT_THAT(tokenize("int main() {}"),
+ ElementsAre(Kind(tok::kw_int),
+ AllOf(HasText("main"), Kind(tok::identifier)),
+ Kind(tok::l_paren), Kind(tok::r_paren),
+ Kind(tok::l_brace), Kind(tok::r_brace)));
+ // Comments are ignored for now.
+ EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
+ ElementsAre(Kind(tok::kw_int),
+ AllOf(HasText("a"), Kind(tok::identifier)),
+ Kind(tok::semi)));
+}
+
+TEST_F(TokenCollectorTest, Basic) {
+ std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
+ {"int main() {}",
+ R"(expanded tokens:
+ int main ( ) { }
+file './input.cpp'
+ spelled tokens:
+ int main ( ) { }
+ no mappings.
+)"},
+ // All kinds of whitespace are ignored.
+ {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n",
+ R"(expanded tokens:
+ int main ( ) { }
+file './input.cpp'
+ spelled tokens:
+ int main ( ) { }
+ no mappings.
+)"},
+ // Annotation tokens are ignored.
+ {R"cpp(
+ #pragma GCC visibility push (public)
+ #pragma GCC visibility pop
+ )cpp",
+ R"(expanded tokens:
+ <empty>
+file './input.cpp'
+ spelled tokens:
+ # pragma GCC visibility push ( public ) # pragma GCC visibility pop
+ mappings:
+ ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
+)"}};
+ for (auto &Test : TestCases)
+ EXPECT_EQ(collectAndDump(Test.first), Test.second)
+ << collectAndDump(Test.first);
+}
+
+TEST_F(TokenCollectorTest, Locations) {
+ // Check locations of the tokens.
+ llvm::Annotations Code(R"cpp(
+ $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
+ )cpp");
+ recordTokens(Code.code());
+ // Check expanded tokens.
+ EXPECT_THAT(
+ Buffer.expandedTokens(),
+ ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
+ AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
+ AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
+ AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
+ AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
+ Kind(tok::eof)));
+ // Check spelled tokens.
+ EXPECT_THAT(
+ Buffer.spelledTokens(SourceMgr->getMainFileID()),
+ ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
+ AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
+ AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
+ AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
+ AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
+}
+
+TEST_F(TokenCollectorTest, MacroDirectives) {
+ // Macro directives are not stored anywhere at the moment.
+ std::string Code = R"cpp(
+ #define FOO a
+ #include "unresolved_file.h"
+ #undef FOO
+ #ifdef X
+ #else
+ #endif
+ #ifndef Y
+ #endif
+ #if 1
+ #elif 2
+ #else
+ #endif
+ #pragma once
+ #pragma something lalala
+
+ int a;
+ )cpp";
+ std::string Expected =
+ "expanded tokens:\n"
+ " int a ;\n"
+ "file './input.cpp'\n"
+ " spelled tokens:\n"
+ " # define FOO a # include \"unresolved_file.h\" # undef FOO "
+ "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
+ "# endif # pragma once # pragma something lalala int a ;\n"
+ " mappings:\n"
+ " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
+ EXPECT_EQ(collectAndDump(Code), Expected);
+}
+
+TEST_F(TokenCollectorTest, MacroReplacements) {
+ std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
+ // A simple object-like macro.
+ {R"cpp(
+ #define INT int const
+ INT a;
+ )cpp",
+ R"(expanded tokens:
+ int const a ;
+file './input.cpp'
+ spelled tokens:
+ # define INT int const INT a ;
+ mappings:
+ ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
+ ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
+)"},
+ // A simple function-like macro.
+ {R"cpp(
+ #define INT(a) const int
+ INT(10+10) a;
+ )cpp",
+ R"(expanded tokens:
+ const int a ;
+file './input.cpp'
+ spelled tokens:
+ # define INT ( a ) const int INT ( 10 + 10 ) a ;
+ mappings:
+ ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
+ ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
+)"},
+ // Recursive macro replacements.
+ {R"cpp(
+ #define ID(X) X
+ #define INT int const
+ ID(ID(INT)) a;
+ )cpp",
+ R"(expanded tokens:
+ int const a ;
+file './input.cpp'
+ spelled tokens:
+ # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
+ mappings:
+ ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
+ ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
+)"},
+ // A little more complicated recursive macro replacements.
+ {R"cpp(
+ #define ADD(X, Y) X+Y
+ #define MULT(X, Y) X*Y
+
+ int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
+ )cpp",
+ "expanded tokens:\n"
+ " int a = 1 * 2 + 3 * 4 + 5 ;\n"
+ "file './input.cpp'\n"
+ " spelled tokens:\n"
+ " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
+ "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
+ " mappings:\n"
+ " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
+ " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
+ // Empty macro replacement.
+ {R"cpp(
+ #define EMPTY
+ #define EMPTY_FUNC(X)
+ EMPTY
+ EMPTY_FUNC(1+2+3)
+ )cpp",
+ R"(expanded tokens:
+ <empty>
+file './input.cpp'
+ spelled tokens:
+ # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
+ mappings:
+ ['#'_0, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
+)"},
+ // File ends with a macro replacement.
+ {R"cpp(
+ #define FOO 10+10;
+ int a = FOO
+ )cpp",
+ R"(expanded tokens:
+ int a = 10 + 10 ;
+file './input.cpp'
+ spelled tokens:
+ # define FOO 10 + 10 ; int a = FOO
+ mappings:
+ ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
+ ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
+)"}};
+
+ for (auto &Test : TestCases)
+ EXPECT_EQ(Test.second, collectAndDump(Test.first))
+ << collectAndDump(Test.first);
+}
+
+TEST_F(TokenCollectorTest, SpecialTokens) {
+ // Tokens coming from concatenations.
+ recordTokens(R"cpp(
+ #define CONCAT(a, b) a ## b
+ int a = CONCAT(1, 2);
+ )cpp");
+ EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
+ Contains(HasText("12")));
+ // Multi-line tokens with slashes at the end.
+ recordTokens("i\\\nn\\\nt");
+ EXPECT_THAT(Buffer.expandedTokens(),
+ ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
+ Kind(tok::eof)));
+ // FIXME: test tokens with digraphs and UCN identifiers.
+}
+
+TEST_F(TokenCollectorTest, LateBoundTokens) {
+ // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
+ // but we choose to record them as a single token (for now).
+ llvm::Annotations Code(R"cpp(
+ template <class T>
+ struct foo { int a; };
+ int bar = foo<foo<int$br[[>>]]().a;
+ int baz = 10 $op[[>>]] 2;
+ )cpp");
+ recordTokens(Code.code());
+ EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
+ AllOf(Contains(AllOf(Kind(tok::greatergreater),
+ RangeIs(Code.range("br")))),
+ Contains(AllOf(Kind(tok::greatergreater),
+ RangeIs(Code.range("op"))))));
+}
+
+TEST_F(TokenCollectorTest, DelayedParsing) {
+ llvm::StringLiteral Code = R"cpp(
+ struct Foo {
+ int method() {
+ // Parser will visit method bodies and initializers multiple times, but
+ // TokenBuffer should only record the first walk over the tokens;
+ return 100;
+ }
+ int a = 10;
+
+ struct Subclass {
+ void foo() {
+ Foo().method();
+ }
+ };
+ };
+ )cpp";
+ std::string ExpectedTokens =
+ "expanded tokens:\n"
+ " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
+ "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
+ EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
+}
+
+TEST_F(TokenCollectorTest, MultiFile) {
+ addFile("./foo.h", R"cpp(
+ #define ADD(X, Y) X+Y
+ int a = 100;
+ #include "bar.h"
+ )cpp");
+ addFile("./bar.h", R"cpp(
+ int b = ADD(1, 2);
+ #define MULT(X, Y) X*Y
+ )cpp");
+ llvm::StringLiteral Code = R"cpp(
+ #include "foo.h"
+ int c = ADD(1, MULT(2,3));
+ )cpp";
+
+ std::string Expected = R"(expanded tokens:
+ int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
+file './input.cpp'
+ spelled tokens:
+ # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
+ mappings:
+ ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
+ ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
+file './foo.h'
+ spelled tokens:
+ # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
+ mappings:
+ ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
+ ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
+file './bar.h'
+ spelled tokens:
+ int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
+ mappings:
+ ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
+ ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
+)";
+
+ EXPECT_EQ(Expected, collectAndDump(Code))
+ << "input: " << Code << "\nresults: " << collectAndDump(Code);
+}
+
+class TokenBufferTest : public TokenCollectorTest {};
+
+TEST_F(TokenBufferTest, SpelledByExpanded) {
+ recordTokens(R"cpp(
+ a1 a2 a3 b1 b2
+ )cpp");
+
+ // Sanity check: expanded and spelled tokens are stored separately.
+ EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
+ // Searching for subranges of expanded tokens should give the corresponding
+ // spelled ones.
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
+ ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
+ ValueIs(SameRange(findSpelled("a1 a2 a3"))));
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
+ ValueIs(SameRange(findSpelled("b1 b2"))));
+
+ // Test search on simple macro expansions.
+ recordTokens(R"cpp(
+ #define A a1 a2 a3
+ #define B b1 b2
+
+ A split B
+ )cpp");
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
+ ValueIs(SameRange(findSpelled("A split B"))));
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
+ ValueIs(SameRange(findSpelled("A split").drop_back())));
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
+ ValueIs(SameRange(findSpelled("split B").drop_front())));
+ // Ranges not fully covering macro invocations should fail.
+ EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
+ EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None);
+ EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
+ llvm::None);
+
+ // Recursive macro invocations.
+ recordTokens(R"cpp(
+ #define ID(x) x
+ #define B b1 b2
+
+ ID(ID(ID(a1) a2 a3)) split ID(B)
+ )cpp");
+
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
+ ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )"))));
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
+ ValueIs(SameRange(findSpelled("ID ( B )"))));
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
+ ValueIs(SameRange(findSpelled(
+ "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
+ // Ranges crossing macro call boundaries.
+ EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")),
+ llvm::None);
+ EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")),
+ llvm::None);
+ // FIXME: next two examples should map to macro arguments, but currently they
+ // fail.
+ EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None);
+ EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
+
+ // Empty macro expansions.
+ recordTokens(R"cpp(
+ #define EMPTY
+ #define ID(X) X
+
+ EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
+ EMPTY EMPTY ID(4 5 6) split2
+ ID(7 8 9) EMPTY EMPTY
+ )cpp");
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
+ ValueIs(SameRange(findSpelled("ID ( 1 2 3 )"))));
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
+ ValueIs(SameRange(findSpelled("ID ( 4 5 6 )"))));
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
+ ValueIs(SameRange(findSpelled("ID ( 7 8 9 )"))));
+
+ // Empty mappings coming from various directives.
+ recordTokens(R"cpp(
+ #define ID(X) X
+ ID(1)
+ #pragma lalala
+ not_mapped
+ )cpp");
+ EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
+ ValueIs(SameRange(findSpelled("not_mapped"))));
+}
+
+TEST_F(TokenBufferTest, TokensToFileRange) {
+ addFile("./foo.h", "token_from_header");
+ llvm::Annotations Code(R"cpp(
+ #define FOO token_from_expansion
+ #include "./foo.h"
+ $all[[$i[[int]] a = FOO;]]
+ )cpp");
+ recordTokens(Code.code());
+
+ auto &SM = *SourceMgr;
+
+ // Two simple examples.
+ auto Int = findExpanded("int").front();
+ auto Semi = findExpanded(";").front();
+ EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
+ Code.range("i").End));
+ EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
+ FileRange(SM.getMainFileID(), Code.range("all").Begin,
+ Code.range("all").End));
+ // We don't test assertion failures because death tests are slow.
+}
+
+} // namespace \ No newline at end of file
OpenPOWER on IntegriCloud