diff options
| author | Chris Lattner <sabre@nondot.org> | 2006-07-29 06:30:25 +0000 |
|---|---|---|
| committer | Chris Lattner <sabre@nondot.org> | 2006-07-29 06:30:25 +0000 |
| commit | 457fc15bc5beb48fb3041a7525b6731edf720cc8 (patch) | |
| tree | 878e141457ea9f7f4b03d3f6d3c8a707dfaa6cd1 /clang | |
| parent | 2be41154652c162e5ab5db6ea41a651c4dc3263f (diff) | |
| download | bcm5719-llvm-457fc15bc5beb48fb3041a7525b6731edf720cc8.tar.gz bcm5719-llvm-457fc15bc5beb48fb3041a7525b6731edf720cc8.zip | |
Implement comment saving mode: the -C and -CC options.
llvm-svn: 38783
Diffstat (limited to 'clang')
| -rw-r--r-- | clang/Driver/PrintPreprocessedOutput.cpp | 12 | ||||
| -rw-r--r-- | clang/Driver/clang.cpp | 2 | ||||
| -rw-r--r-- | clang/Driver/clang.h | 5 | ||||
| -rw-r--r-- | clang/Lex/Lexer.cpp | 90 | ||||
| -rw-r--r-- | clang/Lex/Preprocessor.cpp | 18 | ||||
| -rw-r--r-- | clang/README.txt | 1 | ||||
| -rw-r--r-- | clang/include/clang/Basic/TokenKinds.def | 3 | ||||
| -rw-r--r-- | clang/include/clang/Lex/Lexer.h | 40 |
8 files changed, 126 insertions, 45 deletions
diff --git a/clang/Driver/PrintPreprocessedOutput.cpp b/clang/Driver/PrintPreprocessedOutput.cpp index dff4411ef2a..e1fb1aa42a7 100644 --- a/clang/Driver/PrintPreprocessedOutput.cpp +++ b/clang/Driver/PrintPreprocessedOutput.cpp @@ -99,6 +99,11 @@ static void OutputString(const char *Ptr, unsigned Size) { static cl::opt<bool> DisableLineMarkers("P", cl::desc("Disable linemarker output in -E mode")); +static cl::opt<bool> +EnableCommentOutput("C", cl::desc("Enable comment output in -E mode")); +static cl::opt<bool> +EnableMacroCommentOutput("CC", cl::desc("Enable comment output in -E mode, " + "even from macro expansions")); static unsigned EModeCurLine; static std::string EModeCurFilename; @@ -357,7 +362,12 @@ static bool AvoidConcat(const LexerToken &PrevTok, const LexerToken &Tok, /// DoPrintPreprocessedInput - This implements -E mode. /// -void clang::DoPrintPreprocessedInput(Preprocessor &PP) { +void clang::DoPrintPreprocessedInput(Preprocessor &PP, LangOptions &Options) { + if (EnableCommentOutput) // -C specified? + Options.KeepComments = 1; + if (EnableMacroCommentOutput) // -CC specified? + Options.KeepComments = Options.KeepMacroComments = 1; + InitOutputBuffer(); LexerToken Tok, PrevTok; diff --git a/clang/Driver/clang.cpp b/clang/Driver/clang.cpp index 8dfbe59f50d..f2464992b3f 100644 --- a/clang/Driver/clang.cpp +++ b/clang/Driver/clang.cpp @@ -703,7 +703,7 @@ int main(int argc, char **argv) { } case PrintPreprocessedInput: // -E mode. - DoPrintPreprocessedInput(PP); + DoPrintPreprocessedInput(PP, Options); break; case DumpTokens: { // Token dump mode. diff --git a/clang/Driver/clang.h b/clang/Driver/clang.h index 98cdb7f3692..3050d6aff14 100644 --- a/clang/Driver/clang.h +++ b/clang/Driver/clang.h @@ -16,10 +16,11 @@ namespace llvm { namespace clang { -class Preprocessor; +class Preprocessor; +class LangOptions; /// DoPrintPreprocessedInput - Implement -E mode. -void DoPrintPreprocessedInput(Preprocessor &PP); +void DoPrintPreprocessedInput(Preprocessor &PP, LangOptions &Options); } // end namespace clang } // end namespace llvm diff --git a/clang/Lex/Lexer.cpp b/clang/Lex/Lexer.cpp index 417d1d8d376..c686d918262 100644 --- a/clang/Lex/Lexer.cpp +++ b/clang/Lex/Lexer.cpp @@ -65,6 +65,9 @@ Lexer::Lexer(const SourceBuffer *File, unsigned fileid, Preprocessor &pp, // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block // or otherwise skipping over tokens. LexingRawMode = false; + + // Default to keeping comments if requested. + KeepCommentMode = Features.KeepComments; } /// Stringify - Convert the specified string into a C string, with surrounding @@ -587,13 +590,15 @@ void Lexer::SkipWhitespace(LexerToken &Result, const char *CurPtr) { // If the next token is obviously a // or /* */ comment, skip it efficiently // too (without going through the big switch stmt). - if (Char == '/' && CurPtr[1] == '/') { + if (Char == '/' && CurPtr[1] == '/' && !KeepCommentMode) { BufferPtr = CurPtr; - return SkipBCPLComment(Result, CurPtr+1); + SkipBCPLComment(Result, CurPtr+1); + return; } - if (Char == '/' && CurPtr[1] == '*') { + if (Char == '/' && CurPtr[1] == '*' && !KeepCommentMode) { BufferPtr = CurPtr; - return SkipBlockComment(Result, CurPtr+2); + SkipBlockComment(Result, CurPtr+2); + return; } BufferPtr = CurPtr; } @@ -601,7 +606,7 @@ void Lexer::SkipWhitespace(LexerToken &Result, const char *CurPtr) { // SkipBCPLComment - We have just read the // characters from input. Skip until // we find the newline character thats terminate the comment. Then update /// BufferPtr and return. -void Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) { +bool Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) { // If BCPL comments aren't explicitly enabled for this language, emit an // extension warning. if (!Features.BCPLComment) { @@ -648,16 +653,20 @@ void Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) { } } - if (CurPtr == BufferEnd+1) goto FoundEOF; + if (CurPtr == BufferEnd+1) { --CurPtr; break; } } while (C != '\n' && C != '\r'); - // Found and did not consume a newline. + // Found but did not consume the newline. + + // If we are returning comments as tokens, return this comment as a token. + if (KeepCommentMode) + return SaveBCPLComment(Result, CurPtr); // If we are inside a preprocessor directive and we see the end of line, // return immediately, so that the lexer can return this as an EOM token. - if (ParsingPreprocessorDirective) { + if (ParsingPreprocessorDirective || CurPtr == BufferEnd) { BufferPtr = CurPtr; - return; + return true; } // Otherwise, eat the \n character. We don't care if this is a \n\r or @@ -674,15 +683,33 @@ void Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) { // big switch, handle it efficiently now. if (isWhitespace(*CurPtr)) { Result.SetFlag(LexerToken::LeadingSpace); - return SkipWhitespace(Result, CurPtr+1); + SkipWhitespace(Result, CurPtr+1); + return true; } BufferPtr = CurPtr; - return; + return true; +} -FoundEOF: // If we ran off the end of the buffer, return EOF. - BufferPtr = CurPtr-1; - return; +/// SaveBCPLComment - If in save-comment mode, package up this BCPL comment in +/// an appropriate way and return it. +bool Lexer::SaveBCPLComment(LexerToken &Result, const char *CurPtr) { + Result.SetKind(tok::comment); + FormTokenWithChars(Result, CurPtr); + + // If this BCPL-style comment is in a macro definition, transmogrify it into + // a C-style block comment. + if (ParsingPreprocessorDirective) { + std::string Spelling = PP.getSpelling(Result); + assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?"); + Spelling[1] = '*'; // Change prefix to "/*". + Spelling += "*/"; // add suffix. + + Result.SetLocation(PP.CreateString(&Spelling[0], Spelling.size(), + Result.getLocation())); + Result.SetLength(Spelling.size()); + } + return false; } /// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline @@ -748,7 +775,7 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, /// because they cannot cause the comment to end. The only thing that can /// happen is the comment could end with an escaped newline between the */ end /// of comment. -void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) { +bool Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) { // Scan one character past where we should, looking for a '/' character. Once // we find it, check to see if it was preceeded by a *. This common // optimization helps people who like to put a lot of * characters in their @@ -757,7 +784,7 @@ void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) { if (C == 0 && CurPtr == BufferEnd+1) { Diag(BufferPtr, diag::err_unterminated_block_comment); BufferPtr = CurPtr-1; - return; + return true; } while (1) { @@ -789,22 +816,31 @@ void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) { // after the /*, but this would involve lexing a lot of what really is the // comment, which surely would confuse the parser. BufferPtr = CurPtr-1; - return; + return true; } C = *CurPtr++; } + + // If we are returning comments as tokens, return this comment as a token. + if (KeepCommentMode) { + Result.SetKind(tok::comment); + FormTokenWithChars(Result, CurPtr); + return false; + } // It is common for the tokens immediately after a /**/ comment to be // whitespace. Instead of going through the big switch, handle it // efficiently now. if (isHorizontalWhitespace(*CurPtr)) { Result.SetFlag(LexerToken::LeadingSpace); - return SkipWhitespace(Result, CurPtr+1); + SkipWhitespace(Result, CurPtr+1); + return true; } // Otherwise, just return so that the next character will be lexed as a token. BufferPtr = CurPtr; Result.SetFlag(LexerToken::LeadingSpace); + return true; } //===----------------------------------------------------------------------===// @@ -920,6 +956,9 @@ bool Lexer::LexEndOfFile(LexerToken &Result, const char *CurPtr) { Result.SetKind(tok::eom); // Update the location of token as well as BufferPtr. FormTokenWithChars(Result, CurPtr); + + // Restore comment saving mode, in case it was disabled for directive. + KeepCommentMode = Features.KeepComments; return true; // Have a token. } @@ -1035,6 +1074,9 @@ LexNextToken: // Done parsing the "line". ParsingPreprocessorDirective = false; + // Restore comment saving mode, in case it was disabled for directive. + KeepCommentMode = Features.KeepComments; + // Since we consumed a newline, we are back at the start of a line. IsAtStartOfLine = true; @@ -1211,13 +1253,13 @@ LexNextToken: // 6.4.9: Comments Char = getCharAndSize(CurPtr, SizeTmp); if (Char == '/') { // BCPL comment. - Result.SetFlag(LexerToken::LeadingSpace); - SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)); - goto LexNextToken; // GCC isn't tail call eliminating. + if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) + goto LexNextToken; // GCC isn't tail call eliminating. + return; // KeepCommentMode } else if (Char == '*') { // /**/ comment. - Result.SetFlag(LexerToken::LeadingSpace); - SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)); - goto LexNextToken; // GCC isn't tail call eliminating. + if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) + goto LexNextToken; // GCC isn't tail call eliminating. + return; // KeepCommentMode } else if (Char == '=') { Result.SetKind(tok::slashequal); CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); diff --git a/clang/Lex/Preprocessor.cpp b/clang/Lex/Preprocessor.cpp index b6c6bc3610d..229aaacd55c 100644 --- a/clang/Lex/Preprocessor.cpp +++ b/clang/Lex/Preprocessor.cpp @@ -755,6 +755,10 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(LexerToken &MacroName, return 0; } // Otherwise, continue to add the tokens to this variable argument. + } else if (Tok.getKind() == tok::comment && !Features.KeepMacroComments) { + // If this is a comment token in the argument list and we're just in + // -C mode (not -CC mode), discard the comment. + continue; } ArgTokens.push_back(Tok); @@ -1221,6 +1225,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // directive mode. Tell the lexer this so any newlines we see will be // converted into an EOM token (this terminates the macro). CurLexer->ParsingPreprocessorDirective = true; + CurLexer->KeepCommentMode = false; + // Read the next token, the directive flavor. LexUnexpandedToken(Tok); @@ -1229,6 +1235,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, // something bogus), skip it. if (Tok.getKind() != tok::identifier) { CurLexer->ParsingPreprocessorDirective = false; + // Restore comment saving mode. + CurLexer->KeepCommentMode = Features.KeepComments; continue; } @@ -1242,6 +1250,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, if (FirstChar >= 'a' && FirstChar <= 'z' && FirstChar != 'i' && FirstChar != 'e') { CurLexer->ParsingPreprocessorDirective = false; + // Restore comment saving mode. + CurLexer->KeepCommentMode = Features.KeepComments; continue; } @@ -1261,6 +1271,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, IdLen = DirectiveStr.size(); if (IdLen >= 20) { CurLexer->ParsingPreprocessorDirective = false; + // Restore comment saving mode. + CurLexer->KeepCommentMode = Features.KeepComments; continue; } memcpy(Directive, &DirectiveStr[0], IdLen); @@ -1339,6 +1351,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, } CurLexer->ParsingPreprocessorDirective = false; + // Restore comment saving mode. + CurLexer->KeepCommentMode = Features.KeepComments; } // Finally, if we are out of the conditional (saw an #endif or ran off the end @@ -1698,6 +1712,10 @@ void Preprocessor::HandleDefineDirective(LexerToken &DefineTok) { if (MacroNameTok.getKind() == tok::eom) return; + // If we are supposed to keep comments in #defines, reenable comment saving + // mode. + CurLexer->KeepCommentMode = Features.KeepMacroComments; + MacroInfo *MI = new MacroInfo(MacroNameTok.getLocation()); LexerToken Tok; diff --git a/clang/README.txt b/clang/README.txt index 3a5604ca4de..e81f2e64cdb 100644 --- a/clang/README.txt +++ b/clang/README.txt @@ -67,7 +67,6 @@ Lexer: Preprocessor: * #assert/#unassert * #line / #file directives - * -C output mode in -E mode. * MSExtension: "L#param" stringizes to a wide string literal. Traditional Preprocessor: diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index ffb0e10b1cc..277e3305cb1 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -28,6 +28,9 @@ TOK(unknown) // Not a token. TOK(eof) // End of file. TOK(eom) // End of macro (end of line inside a macro). +// C99 6.4.9: Comments. +TOK(comment) // Comment (only in -E -C[C] mode) + // C99 6.4.2: Identifiers. TOK(identifier) // abcde123 diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index fb337db1756..cd7c5a9fe2d 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -26,23 +26,27 @@ class Preprocessor; class SourceBuffer; struct LangOptions { - unsigned Trigraphs : 1; // Trigraphs in source files. - unsigned BCPLComment : 1; // BCPL-style // comments. - unsigned DollarIdents : 1; // '$' allowed in identifiers. - unsigned Digraphs : 1; // When added to C? C99? - unsigned HexFloats : 1; // C99 Hexadecimal float constants. - unsigned C99 : 1; // C99 Support - unsigned Microsoft : 1; // Microsoft extensions. - unsigned CPlusPlus : 1; // C++ Support - unsigned CPPMinMax : 1; // C++ <?=, >?= tokens. - unsigned NoExtensions : 1; // All extensions are disabled, strict mode. - - unsigned ObjC1 : 1; // Objective C 1 support enabled. - unsigned ObjC2 : 1; // Objective C 2 support enabled (implies ObjC1). + unsigned Trigraphs : 1; // Trigraphs in source files. + unsigned BCPLComment : 1; // BCPL-style // comments. + unsigned DollarIdents : 1; // '$' allowed in identifiers. + unsigned Digraphs : 1; // When added to C? C99? + unsigned HexFloats : 1; // C99 Hexadecimal float constants. + unsigned C99 : 1; // C99 Support + unsigned Microsoft : 1; // Microsoft extensions. + unsigned CPlusPlus : 1; // C++ Support + unsigned CPPMinMax : 1; // C++ <?=, >?= tokens. + unsigned NoExtensions : 1; // All extensions are disabled, strict mode. + + unsigned ObjC1 : 1; // Objective C 1 support enabled. + unsigned ObjC2 : 1; // Objective C 2 support enabled. + + unsigned KeepComments : 1; // Keep comments ("-C") mode. + unsigned KeepMacroComments : 1; // Keep macro-exp comments ("-CC") mode. LangOptions() { Trigraphs = BCPLComment = DollarIdents = Digraphs = ObjC1 = ObjC2 = 0; C99 = Microsoft = CPlusPlus = CPPMinMax = NoExtensions = 0; + KeepComments = KeepMacroComments = 0; } }; @@ -87,6 +91,10 @@ class Lexer { /// on an unterminated '/*' comment. bool LexingRawMode; + /// KeepCommentMode - The lexer can optionally keep C & BCPL-style comments, + /// and return them as tokens. This is used for -C and -CC modes. + bool KeepCommentMode; + //===--------------------------------------------------------------------===// // Context that changes as the file is lexed. // NOTE: any state that mutates when in raw mode must have save/restore code @@ -353,9 +361,9 @@ private: bool LexEndOfFile (LexerToken &Result, const char *CurPtr); void SkipWhitespace (LexerToken &Result, const char *CurPtr); - void SkipBCPLComment (LexerToken &Result, const char *CurPtr); - void SkipBlockComment (LexerToken &Result, const char *CurPtr); - + bool SkipBCPLComment (LexerToken &Result, const char *CurPtr); + bool SkipBlockComment (LexerToken &Result, const char *CurPtr); + bool SaveBCPLComment (LexerToken &Result, const char *CurPtr); /// LexIncludeFilename - After the preprocessor has parsed a #include, lex and /// (potentially) macro expand the filename. If the sequence parsed is not |

