Implement AST classes for comments, a real parser for Doxygen comments and a

very simple semantic analysis that just builds the AST; minor changes for lexer to pick up source locations I didn't think about before. Comments AST is modelled along the ideas of HTML AST: block and inline content. * Block content is a paragraph or a command that has a paragraph as an argument or verbatim command. * Inline content is placed within some block. Inline content includes plain text, inline commands and HTML as tag soup. llvm-svn: 159790
author: Dmitri Gribenko <gribozavr@gmail.com> 2012-07-06 00:28:32 +0000
committer: Dmitri Gribenko <gribozavr@gmail.com> 2012-07-06 00:28:32 +0000
commit: ec92531c292e9c6961dd3e545c9fb031135b19bc (patch)
tree: 2c344abf5dbc2547b3b0159880de0196461572e2 /clang/lib/AST/CommentLexer.cpp
parent: d5200f1bc4cb41329fd05a09d552e63705b294fc (diff)
download: bcm5719-llvm-ec92531c292e9c6961dd3e545c9fb031135b19bc.tar.gz
bcm5719-llvm-ec92531c292e9c6961dd3e545c9fb031135b19bc.zip
1 files changed, 36 insertions, 8 deletions
diff --git a/clang/lib/AST/CommentLexer.cpp b/clang/lib/AST/CommentLexer.cpp
index c3a801d924c..77d2a9b72dd 100644
--- a/clang/lib/AST/CommentLexer.cpp
+++ b/clang/lib/AST/CommentLexer.cpp
@@ -122,6 +122,7 @@ void Lexer::skipLineStartingDecorations() {
 }
 
 namespace {
+/// Returns pointer to the first newline character in the string.
 const char *findNewline(const char *BufferPtr, const char *BufferEnd) {
   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
     const char C = *BufferPtr;
@@ -270,6 +271,9 @@ void Lexer::lexCommentText(Token &T) {
   case LS_HTMLOpenTag:
     lexHTMLOpenTag(T);
     return;
+  case LS_HTMLCloseTag:
+    lexHTMLCloseTag(T);
+    return;
   }
 
   assert(State == LS_Normal);
@@ -356,7 +360,7 @@ void Lexer::lexCommentText(Token &T) {
         if (isHTMLIdentifierCharacter(C))
           setupAndLexHTMLOpenTag(T);
         else if (C == '/')
-          lexHTMLCloseTag(T);
+          setupAndLexHTMLCloseTag(T);
         else {
           StringRef Text(BufferPtr, TokenPtr - BufferPtr);
           formTokenWithChars(T, TokenPtr, tok::text);
@@ -404,6 +408,18 @@ void Lexer::setupAndLexVerbatimBlock(Token &T,
   formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
   T.setVerbatimBlockName(Name);
 
+  // If there is a newline following the verbatim opening command, skip the
+  // newline so that we don't create an tok::verbatim_block_line with empty
+  // text content.
+  if (BufferPtr != CommentEnd) {
+    const char C = *BufferPtr;
+    if (C == '\n' || C == '\r') {
+      BufferPtr = skipNewline(BufferPtr, CommentEnd);
+      State = LS_VerbatimBlockBody;
+      return;
+    }
+  }
+
   State = LS_VerbatimBlockFirstLine;
 }
 
@@ -419,9 +435,11 @@ void Lexer::lexVerbatimBlockFirstLine(Token &T) {
 
   // Look for end command in current line.
   size_t Pos = Line.find(VerbatimBlockEndCommandName);
+  const char *TextEnd;
   const char *NextLine;
   if (Pos == StringRef::npos) {
     // Current line is completely verbatim.
+    TextEnd = Newline;
     NextLine = skipNewline(Newline, CommentEnd);
   } else if (Pos == 0) {
     // Current line contains just an end command.
@@ -433,10 +451,11 @@ void Lexer::lexVerbatimBlockFirstLine(Token &T) {
     return;
   } else {
     // There is some text, followed by end command.  Extract text first.
-    NextLine = BufferPtr + Pos;
+    TextEnd = BufferPtr + Pos;
+    NextLine = TextEnd;
   }
 
-  StringRef Text(BufferPtr, NextLine - BufferPtr);
+  StringRef Text(BufferPtr, TextEnd - BufferPtr);
   formTokenWithChars(T, NextLine, tok::verbatim_block_line);
   T.setVerbatimBlockText(Text);
 
@@ -542,18 +561,26 @@ void Lexer::lexHTMLOpenTag(Token &T) {
   }
 }
 
-void Lexer::lexHTMLCloseTag(Token &T) {
+void Lexer::setupAndLexHTMLCloseTag(Token &T) {
   assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
 
   const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
   const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
 
   const char *End = skipWhitespace(TagNameEnd, CommentEnd);
-  if (End != CommentEnd && *End == '>')
-    End++;
 
   formTokenWithChars(T, End, tok::html_tag_close);
   T.setHTMLTagCloseName(StringRef(TagNameBegin, TagNameEnd - TagNameBegin));
+
+  if (BufferPtr != CommentEnd && *BufferPtr == '>')
+    State = LS_HTMLCloseTag;
+}
+
+void Lexer::lexHTMLCloseTag(Token &T) {
+  assert(BufferPtr != CommentEnd && *BufferPtr == '>');
+
+  formTokenWithChars(T, BufferPtr + 1, tok::html_greater);
+  State = LS_Normal;
 }
 
 Lexer::Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts,
@@ -595,7 +622,8 @@ again:
         BufferPtr++;
 
       CommentState = LCS_InsideBCPLComment;
-      State = LS_Normal;
+      if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
+        State = LS_Normal;
       CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
       goto again;
     }
@@ -628,7 +656,7 @@ again:
       EndWhitespace++;
 
     // Turn any whitespace between comments (and there is only whitespace
-    // between them) into a newline.  We have two newlines between comments
+    // between them) into a newline.  We have two newlines between C comments
     // in total (first one was synthesized after a comment).
     formTokenWithChars(T, EndWhitespace, tok::newline);
author	Dmitri Gribenko <gribozavr@gmail.com>	2012-07-06 00:28:32 +0000
committer	Dmitri Gribenko <gribozavr@gmail.com>	2012-07-06 00:28:32 +0000
commit	ec92531c292e9c6961dd3e545c9fb031135b19bc (patch)
tree	2c344abf5dbc2547b3b0159880de0196461572e2 /clang/lib/AST/CommentLexer.cpp
parent	d5200f1bc4cb41329fd05a09d552e63705b294fc (diff)
download	bcm5719-llvm-ec92531c292e9c6961dd3e545c9fb031135b19bc.tar.gz bcm5719-llvm-ec92531c292e9c6961dd3e545c9fb031135b19bc.zip