diff options
author | Dmitri Gribenko <gribozavr@gmail.com> | 2012-07-06 00:28:32 +0000 |
---|---|---|
committer | Dmitri Gribenko <gribozavr@gmail.com> | 2012-07-06 00:28:32 +0000 |
commit | ec92531c292e9c6961dd3e545c9fb031135b19bc (patch) | |
tree | 2c344abf5dbc2547b3b0159880de0196461572e2 /clang/lib/AST/CommentLexer.cpp | |
parent | d5200f1bc4cb41329fd05a09d552e63705b294fc (diff) | |
download | bcm5719-llvm-ec92531c292e9c6961dd3e545c9fb031135b19bc.tar.gz bcm5719-llvm-ec92531c292e9c6961dd3e545c9fb031135b19bc.zip |
Implement AST classes for comments, a real parser for Doxygen comments and a
very simple semantic analysis that just builds the AST; minor changes for lexer
to pick up source locations I didn't think about before.
Comments AST is modelled along the ideas of HTML AST: block and inline content.
* Block content is a paragraph or a command that has a paragraph as an argument
or verbatim command.
* Inline content is placed within some block. Inline content includes plain
text, inline commands and HTML as tag soup.
llvm-svn: 159790
Diffstat (limited to 'clang/lib/AST/CommentLexer.cpp')
-rw-r--r-- | clang/lib/AST/CommentLexer.cpp | 44 |
1 files changed, 36 insertions, 8 deletions
diff --git a/clang/lib/AST/CommentLexer.cpp b/clang/lib/AST/CommentLexer.cpp index c3a801d924c..77d2a9b72dd 100644 --- a/clang/lib/AST/CommentLexer.cpp +++ b/clang/lib/AST/CommentLexer.cpp @@ -122,6 +122,7 @@ void Lexer::skipLineStartingDecorations() { } namespace { +/// Returns pointer to the first newline character in the string. const char *findNewline(const char *BufferPtr, const char *BufferEnd) { for ( ; BufferPtr != BufferEnd; ++BufferPtr) { const char C = *BufferPtr; @@ -270,6 +271,9 @@ void Lexer::lexCommentText(Token &T) { case LS_HTMLOpenTag: lexHTMLOpenTag(T); return; + case LS_HTMLCloseTag: + lexHTMLCloseTag(T); + return; } assert(State == LS_Normal); @@ -356,7 +360,7 @@ void Lexer::lexCommentText(Token &T) { if (isHTMLIdentifierCharacter(C)) setupAndLexHTMLOpenTag(T); else if (C == '/') - lexHTMLCloseTag(T); + setupAndLexHTMLCloseTag(T); else { StringRef Text(BufferPtr, TokenPtr - BufferPtr); formTokenWithChars(T, TokenPtr, tok::text); @@ -404,6 +408,18 @@ void Lexer::setupAndLexVerbatimBlock(Token &T, formTokenWithChars(T, TextBegin, tok::verbatim_block_begin); T.setVerbatimBlockName(Name); + // If there is a newline following the verbatim opening command, skip the + // newline so that we don't create an tok::verbatim_block_line with empty + // text content. + if (BufferPtr != CommentEnd) { + const char C = *BufferPtr; + if (C == '\n' || C == '\r') { + BufferPtr = skipNewline(BufferPtr, CommentEnd); + State = LS_VerbatimBlockBody; + return; + } + } + State = LS_VerbatimBlockFirstLine; } @@ -419,9 +435,11 @@ void Lexer::lexVerbatimBlockFirstLine(Token &T) { // Look for end command in current line. size_t Pos = Line.find(VerbatimBlockEndCommandName); + const char *TextEnd; const char *NextLine; if (Pos == StringRef::npos) { // Current line is completely verbatim. + TextEnd = Newline; NextLine = skipNewline(Newline, CommentEnd); } else if (Pos == 0) { // Current line contains just an end command. @@ -433,10 +451,11 @@ void Lexer::lexVerbatimBlockFirstLine(Token &T) { return; } else { // There is some text, followed by end command. Extract text first. - NextLine = BufferPtr + Pos; + TextEnd = BufferPtr + Pos; + NextLine = TextEnd; } - StringRef Text(BufferPtr, NextLine - BufferPtr); + StringRef Text(BufferPtr, TextEnd - BufferPtr); formTokenWithChars(T, NextLine, tok::verbatim_block_line); T.setVerbatimBlockText(Text); @@ -542,18 +561,26 @@ void Lexer::lexHTMLOpenTag(Token &T) { } } -void Lexer::lexHTMLCloseTag(Token &T) { +void Lexer::setupAndLexHTMLCloseTag(Token &T) { assert(BufferPtr[0] == '<' && BufferPtr[1] == '/'); const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd); const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd); const char *End = skipWhitespace(TagNameEnd, CommentEnd); - if (End != CommentEnd && *End == '>') - End++; formTokenWithChars(T, End, tok::html_tag_close); T.setHTMLTagCloseName(StringRef(TagNameBegin, TagNameEnd - TagNameBegin)); + + if (BufferPtr != CommentEnd && *BufferPtr == '>') + State = LS_HTMLCloseTag; +} + +void Lexer::lexHTMLCloseTag(Token &T) { + assert(BufferPtr != CommentEnd && *BufferPtr == '>'); + + formTokenWithChars(T, BufferPtr + 1, tok::html_greater); + State = LS_Normal; } Lexer::Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts, @@ -595,7 +622,8 @@ again: BufferPtr++; CommentState = LCS_InsideBCPLComment; - State = LS_Normal; + if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine) + State = LS_Normal; CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd); goto again; } @@ -628,7 +656,7 @@ again: EndWhitespace++; // Turn any whitespace between comments (and there is only whitespace - // between them) into a newline. We have two newlines between comments + // between them) into a newline. We have two newlines between C comments // in total (first one was synthesized after a comment). formTokenWithChars(T, EndWhitespace, tok::newline); |