diff options
author | Ilya Biryukov <ibiryukov@google.com> | 2018-05-16 12:30:09 +0000 |
---|---|---|
committer | Ilya Biryukov <ibiryukov@google.com> | 2018-05-16 12:30:09 +0000 |
commit | 1ff7c32fc91c607b690d4bb9cf42f406be8dde68 (patch) | |
tree | d0381185f45ca8a75734da3f283df9e9432ed125 /clang/lib/AST/CommentLexer.cpp | |
parent | a3f955bddb9102eb9813103f76fa76f1db9d1d9d (diff) | |
download | bcm5719-llvm-1ff7c32fc91c607b690d4bb9cf42f406be8dde68.tar.gz bcm5719-llvm-1ff7c32fc91c607b690d4bb9cf42f406be8dde68.zip |
[AST] Added a helper to extract a user-friendly text of a comment.
Summary:
The helper is used in clangd for documentation shown in code completion
and storing the docs in the symbols. See D45999.
This patch reuses the code of the Doxygen comment lexer, disabling the
bits that do command and html tag parsing.
The new helper works on all comments, including non-doxygen comments.
However, it does not understand or transform any doxygen directives,
i.e. cannot extract brief text, etc.
Reviewers: sammccall, hokein, ioeric
Reviewed By: ioeric
Subscribers: mgorny, cfe-commits
Differential Revision: https://reviews.llvm.org/D46000
llvm-svn: 332458
Diffstat (limited to 'clang/lib/AST/CommentLexer.cpp')
-rw-r--r-- | clang/lib/AST/CommentLexer.cpp | 246 |
1 files changed, 129 insertions, 117 deletions
diff --git a/clang/lib/AST/CommentLexer.cpp b/clang/lib/AST/CommentLexer.cpp index 65d0f56f09a..6ff4d45a957 100644 --- a/clang/lib/AST/CommentLexer.cpp +++ b/clang/lib/AST/CommentLexer.cpp @@ -294,6 +294,39 @@ void Lexer::lexCommentText(Token &T) { assert(CommentState == LCS_InsideBCPLComment || CommentState == LCS_InsideCComment); + // Handles lexing non-command text, i.e. text and newline. + auto HandleNonCommandToken = [&]() -> void { + assert(State == LS_Normal); + + const char *TokenPtr = BufferPtr; + assert(TokenPtr < CommentEnd); + switch (*TokenPtr) { + case '\n': + case '\r': + TokenPtr = skipNewline(TokenPtr, CommentEnd); + formTokenWithChars(T, TokenPtr, tok::newline); + + if (CommentState == LCS_InsideCComment) + skipLineStartingDecorations(); + return; + + default: { + StringRef TokStartSymbols = ParseCommands ? "\n\r\\@&<" : "\n\r"; + size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr) + .find_first_of(TokStartSymbols); + if (End != StringRef::npos) + TokenPtr += End; + else + TokenPtr = CommentEnd; + formTextToken(T, TokenPtr); + return; + } + } + }; + + if (!ParseCommands) + return HandleNonCommandToken(); + switch (State) { case LS_Normal: break; @@ -315,136 +348,116 @@ void Lexer::lexCommentText(Token &T) { } assert(State == LS_Normal); - const char *TokenPtr = BufferPtr; assert(TokenPtr < CommentEnd); - while (TokenPtr != CommentEnd) { - switch(*TokenPtr) { - case '\\': - case '@': { - // Commands that start with a backslash and commands that start with - // 'at' have equivalent semantics. But we keep information about the - // exact syntax in AST for comments. - tok::TokenKind CommandKind = - (*TokenPtr == '@') ? tok::at_command : tok::backslash_command; + switch(*TokenPtr) { + case '\\': + case '@': { + // Commands that start with a backslash and commands that start with + // 'at' have equivalent semantics. But we keep information about the + // exact syntax in AST for comments. + tok::TokenKind CommandKind = + (*TokenPtr == '@') ? tok::at_command : tok::backslash_command; + TokenPtr++; + if (TokenPtr == CommentEnd) { + formTextToken(T, TokenPtr); + return; + } + char C = *TokenPtr; + switch (C) { + default: + break; + + case '\\': case '@': case '&': case '$': + case '#': case '<': case '>': case '%': + case '\"': case '.': case ':': + // This is one of \\ \@ \& \$ etc escape sequences. TokenPtr++; - if (TokenPtr == CommentEnd) { - formTextToken(T, TokenPtr); - return; - } - char C = *TokenPtr; - switch (C) { - default: - break; - - case '\\': case '@': case '&': case '$': - case '#': case '<': case '>': case '%': - case '\"': case '.': case ':': - // This is one of \\ \@ \& \$ etc escape sequences. + if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') { + // This is the \:: escape sequence. TokenPtr++; - if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') { - // This is the \:: escape sequence. - TokenPtr++; - } - StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1)); - formTokenWithChars(T, TokenPtr, tok::text); - T.setText(UnescapedText); - return; } + StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1)); + formTokenWithChars(T, TokenPtr, tok::text); + T.setText(UnescapedText); + return; + } - // Don't make zero-length commands. - if (!isCommandNameStartCharacter(*TokenPtr)) { - formTextToken(T, TokenPtr); - return; - } + // Don't make zero-length commands. + if (!isCommandNameStartCharacter(*TokenPtr)) { + formTextToken(T, TokenPtr); + return; + } - TokenPtr = skipCommandName(TokenPtr, CommentEnd); - unsigned Length = TokenPtr - (BufferPtr + 1); - - // Hardcoded support for lexing LaTeX formula commands - // \f$ \f[ \f] \f{ \f} as a single command. - if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) { - C = *TokenPtr; - if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') { - TokenPtr++; - Length++; - } - } + TokenPtr = skipCommandName(TokenPtr, CommentEnd); + unsigned Length = TokenPtr - (BufferPtr + 1); - StringRef CommandName(BufferPtr + 1, Length); - - const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName); - if (!Info) { - if ((Info = Traits.getTypoCorrectCommandInfo(CommandName))) { - StringRef CorrectedName = Info->Name; - SourceLocation Loc = getSourceLocation(BufferPtr); - SourceLocation EndLoc = getSourceLocation(TokenPtr); - SourceRange FullRange = SourceRange(Loc, EndLoc); - SourceRange CommandRange(Loc.getLocWithOffset(1), EndLoc); - Diag(Loc, diag::warn_correct_comment_command_name) - << FullRange << CommandName << CorrectedName - << FixItHint::CreateReplacement(CommandRange, CorrectedName); - } else { - formTokenWithChars(T, TokenPtr, tok::unknown_command); - T.setUnknownCommandName(CommandName); - Diag(T.getLocation(), diag::warn_unknown_comment_command_name) - << SourceRange(T.getLocation(), T.getEndLocation()); - return; - } - } - if (Info->IsVerbatimBlockCommand) { - setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info); - return; - } - if (Info->IsVerbatimLineCommand) { - setupAndLexVerbatimLine(T, TokenPtr, Info); - return; + // Hardcoded support for lexing LaTeX formula commands + // \f$ \f[ \f] \f{ \f} as a single command. + if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) { + C = *TokenPtr; + if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') { + TokenPtr++; + Length++; } - formTokenWithChars(T, TokenPtr, CommandKind); - T.setCommandID(Info->getID()); - return; } - case '&': - lexHTMLCharacterReference(T); - return; - - case '<': { - TokenPtr++; - if (TokenPtr == CommentEnd) { - formTextToken(T, TokenPtr); + StringRef CommandName(BufferPtr + 1, Length); + + const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName); + if (!Info) { + if ((Info = Traits.getTypoCorrectCommandInfo(CommandName))) { + StringRef CorrectedName = Info->Name; + SourceLocation Loc = getSourceLocation(BufferPtr); + SourceLocation EndLoc = getSourceLocation(TokenPtr); + SourceRange FullRange = SourceRange(Loc, EndLoc); + SourceRange CommandRange(Loc.getLocWithOffset(1), EndLoc); + Diag(Loc, diag::warn_correct_comment_command_name) + << FullRange << CommandName << CorrectedName + << FixItHint::CreateReplacement(CommandRange, CorrectedName); + } else { + formTokenWithChars(T, TokenPtr, tok::unknown_command); + T.setUnknownCommandName(CommandName); + Diag(T.getLocation(), diag::warn_unknown_comment_command_name) + << SourceRange(T.getLocation(), T.getEndLocation()); return; } - const char C = *TokenPtr; - if (isHTMLIdentifierStartingCharacter(C)) - setupAndLexHTMLStartTag(T); - else if (C == '/') - setupAndLexHTMLEndTag(T); - else - formTextToken(T, TokenPtr); + } + if (Info->IsVerbatimBlockCommand) { + setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info); return; } - - case '\n': - case '\r': - TokenPtr = skipNewline(TokenPtr, CommentEnd); - formTokenWithChars(T, TokenPtr, tok::newline); - - if (CommentState == LCS_InsideCComment) - skipLineStartingDecorations(); + if (Info->IsVerbatimLineCommand) { + setupAndLexVerbatimLine(T, TokenPtr, Info); return; + } + formTokenWithChars(T, TokenPtr, CommandKind); + T.setCommandID(Info->getID()); + return; + } - default: { - size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr). - find_first_of("\n\r\\@&<"); - if (End != StringRef::npos) - TokenPtr += End; - else - TokenPtr = CommentEnd; + case '&': + lexHTMLCharacterReference(T); + return; + + case '<': { + TokenPtr++; + if (TokenPtr == CommentEnd) { formTextToken(T, TokenPtr); return; } + const char C = *TokenPtr; + if (isHTMLIdentifierStartingCharacter(C)) + setupAndLexHTMLStartTag(T); + else if (C == '/') + setupAndLexHTMLEndTag(T); + else + formTextToken(T, TokenPtr); + return; } + + default: + return HandleNonCommandToken(); } } @@ -727,14 +740,13 @@ void Lexer::lexHTMLEndTag(Token &T) { } Lexer::Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags, - const CommandTraits &Traits, - SourceLocation FileLoc, - const char *BufferStart, const char *BufferEnd): - Allocator(Allocator), Diags(Diags), Traits(Traits), - BufferStart(BufferStart), BufferEnd(BufferEnd), - FileLoc(FileLoc), BufferPtr(BufferStart), - CommentState(LCS_BeforeComment), State(LS_Normal) { -} + const CommandTraits &Traits, SourceLocation FileLoc, + const char *BufferStart, const char *BufferEnd, + bool ParseCommands) + : Allocator(Allocator), Diags(Diags), Traits(Traits), + BufferStart(BufferStart), BufferEnd(BufferEnd), FileLoc(FileLoc), + BufferPtr(BufferStart), CommentState(LCS_BeforeComment), State(LS_Normal), + ParseCommands(ParseCommands) {} void Lexer::lex(Token &T) { again: |