diff options
| author | Chris Lattner <sabre@nondot.org> | 2006-06-18 16:22:51 +0000 |
|---|---|---|
| committer | Chris Lattner <sabre@nondot.org> | 2006-06-18 16:22:51 +0000 |
| commit | d01e291332fe9174762ed053f150aa21bd931c02 (patch) | |
| tree | 843e207879a96bbc3874b7778c60f6341c3aad36 /clang/Lex/Lexer.cpp | |
| parent | de0b7f6a311fcb05e6178d5322f82d725cd89d4e (diff) | |
| download | bcm5719-llvm-d01e291332fe9174762ed053f150aa21bd931c02.tar.gz bcm5719-llvm-d01e291332fe9174762ed053f150aa21bd931c02.zip | |
Make a fundamental change to the way we represent the location of LexerToken's.
Now, instead of keeping a pointer to the start of the token in memory, we keep the
start of the token as a SourceLocation node. This means that each LexerToken knows
the full include stack it was created with, and means that the LexerToken isn't
reliant on a "CurLexer" member to be around (lexer tokens would previously go out of
scope when their lexers were deallocated).
This simplifies several things, and forces good cleanup elsewhere. Now the
Preprocessor is the one that knows how to dump tokens/macros and is the one that
knows how to get the spelling of a token (it has all the context).
llvm-svn: 38551
Diffstat (limited to 'clang/Lex/Lexer.cpp')
| -rw-r--r-- | clang/Lex/Lexer.cpp | 190 |
1 files changed, 43 insertions, 147 deletions
diff --git a/clang/Lex/Lexer.cpp b/clang/Lex/Lexer.cpp index eaf5ff099fb..e8572ac1653 100644 --- a/clang/Lex/Lexer.cpp +++ b/clang/Lex/Lexer.cpp @@ -32,7 +32,6 @@ #include "clang/Basic/SourceBuffer.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Config/alloca.h" -#include <cassert> #include <cctype> #include <iostream> using namespace llvm; @@ -64,37 +63,6 @@ Lexer::Lexer(const SourceBuffer *File, unsigned fileid, Preprocessor &pp) // LexerToken implementation. //===----------------------------------------------------------------------===// -/// getSourceLocation - Return a source location identifier for the specified -/// offset in the current file. -SourceLocation LexerToken::getSourceLocation() const { - if (TheLexer) - return TheLexer->getSourceLocation(Start); - return SourceLocation(); -} - - -/// dump - Print the token to stderr, used for debugging. -/// -void LexerToken::dump(const LangOptions &Features, bool DumpFlags) const { - std::cerr << clang::tok::getTokenName(Kind) << " '"; - - if (needsCleaning()) - std::cerr << Lexer::getSpelling(*this, Features); - else - std::cerr << std::string(getStart(), getEnd()); - std::cerr << "'"; - - if (DumpFlags) { - std::cerr << "\t"; - if (isAtStartOfLine()) - std::cerr << " [StartOfLine]"; - if (hasLeadingSpace()) - std::cerr << " [LeadingSpace]"; - if (needsCleaning()) - std::cerr << " [Spelling='" << std::string(getStart(), getEnd()) << "']"; - } -} - //===----------------------------------------------------------------------===// // Character information. //===----------------------------------------------------------------------===// @@ -153,6 +121,7 @@ static inline bool isNumberBody(unsigned char c) { return CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD); } + //===----------------------------------------------------------------------===// // Diagnostics forwarding code. //===----------------------------------------------------------------------===// @@ -225,8 +194,8 @@ static char DecodeTrigraphChar(const char *CP, Lexer *L) { /// know that we can accumulate into Size, and that we have already incremented /// Ptr by Size bytes. /// -/// When this method is updated, getCharAndSizeSlowNoWarn (below) should be -/// updated to match. +/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should +/// be updated to match. /// char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, LexerToken *Tok) { @@ -289,13 +258,14 @@ Slash: return *Ptr; } + /// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the /// getCharAndSizeNoWarn method. Here we know that we can accumulate into Size, /// and that we have already incremented Ptr by Size bytes. /// -/// When this method is updated, getCharAndSizeSlow (above) should be updated to -/// match. -static char getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, +/// NOTE: When this method is updated, getCharAndSizeSlow (above) should +/// be updated to match. +char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, const LangOptions &Features) { // If we have a slash, look for an escaped newline. if (Ptr[0] == '\\') { @@ -348,80 +318,6 @@ Slash: return *Ptr; } -/// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever -/// emit a warning. -static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, - const LangOptions &Features) { - // If this is not a trigraph and not a UCN or escaped newline, return - // quickly. - if (Ptr[0] != '?' && Ptr[0] != '\\') { - Size = 1; - return *Ptr; - } - - Size = 0; - return getCharAndSizeSlowNoWarn(Ptr, Size, Features); -} - - -/// getSpelling() - Return the 'spelling' of this token. The spelling of a -/// token are the characters used to represent the token in the source file -/// after trigraph expansion and escaped-newline folding. In particular, this -/// wants to get the true, uncanonicalized, spelling of things like digraphs -/// UCNs, etc. -std::string Lexer::getSpelling(const LexerToken &Tok, - const LangOptions &Features) { - assert(Tok.getStart() <= Tok.getEnd() && "Token character range is bogus!"); - - // If this token contains nothing interesting, return it directly. - if (!Tok.needsCleaning()) - return std::string(Tok.getStart(), Tok.getEnd()); - - // Otherwise, hard case, relex the characters into the string. - std::string Result; - Result.reserve(Tok.getLength()); - - for (const char *Ptr = Tok.getStart(), *End = Tok.getEnd(); Ptr != End; ) { - unsigned CharSize; - Result.push_back(getCharAndSizeNoWarn(Ptr, CharSize, Features)); - Ptr += CharSize; - } - assert(Result.size() != unsigned(Tok.getLength()) && - "NeedsCleaning flag set on something that didn't need cleaning!"); - return Result; -} - -/// getSpelling - This method is used to get the spelling of a token into a -/// preallocated buffer, instead of as an std::string. The caller is required -/// to allocate enough space for the token, which is guaranteed to be at most -/// Tok.End-Tok.Start bytes long. The actual length of the token is returned. -unsigned Lexer::getSpelling(const LexerToken &Tok, char *Buffer, - const LangOptions &Features) { - assert(Tok.getStart() <= Tok.getEnd() && "Token character range is bogus!"); - - // If this token contains nothing interesting, return it directly. - if (!Tok.needsCleaning()) { - unsigned Size = Tok.getLength(); - memcpy(Buffer, Tok.getStart(), Size); - return Size; - } - // Otherwise, hard case, relex the characters into the string. - std::string Result; - Result.reserve(Tok.getLength()); - - char *OutBuf = Buffer; - for (const char *Ptr = Tok.getStart(), *End = Tok.getEnd(); Ptr != End; ) { - unsigned CharSize; - *OutBuf++ = getCharAndSizeNoWarn(Ptr, CharSize, Features); - Ptr += CharSize; - } - assert(unsigned(OutBuf-Buffer) != Tok.getLength() && - "NeedsCleaning flag set on something that didn't need cleaning!"); - - return OutBuf-Buffer; -} - - //===----------------------------------------------------------------------===// // Helper methods for lexing. //===----------------------------------------------------------------------===// @@ -440,19 +336,20 @@ void Lexer::LexIdentifier(LexerToken &Result, const char *CurPtr) { // FIXME: universal chars. if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) { FinishIdentifier: - Result.SetEnd(BufferPtr = CurPtr); + const char *IdStart = BufferPtr, *IdEnd = CurPtr; + FormTokenWithChars(Result, CurPtr); Result.SetKind(tok::identifier); // Look up this token, see if it is a macro, or if it is a language keyword. const char *SpelledTokStart, *SpelledTokEnd; if (!Result.needsCleaning()) { // No cleaning needed, just use the characters from the lexed buffer. - SpelledTokStart = Result.getStart(); - SpelledTokEnd = Result.getEnd(); + SpelledTokStart = IdStart; + SpelledTokEnd = IdEnd; } else { // Cleaning needed, alloca a buffer, clean into it, then use the buffer. char *TmpBuf = (char*)alloca(Result.getLength()); - unsigned Size = getSpelling(Result, TmpBuf); + unsigned Size = PP.getSpelling(Result, TmpBuf); SpelledTokStart = TmpBuf; SpelledTokEnd = TmpBuf+Size; } @@ -516,8 +413,8 @@ void Lexer::LexNumericConstant(LexerToken &Result, const char *CurPtr) { Result.SetKind(tok::numeric_constant); - // Update the end of token position as well as the BufferPtr instance var. - Result.SetEnd(BufferPtr = CurPtr); + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); } /// LexStringLiteral - Lex the remainder of a string literal, after having lexed @@ -533,7 +430,7 @@ void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) { C = getAndAdvanceChar(CurPtr, Result); } else if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. - Diag(Result.getStart(), diag::err_unterminated_string); + Diag(BufferPtr, diag::err_unterminated_string); BufferPtr = CurPtr-1; return LexTokenInternal(Result); } else if (C == 0) { @@ -546,8 +443,8 @@ void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) { Result.SetKind(tok::string_literal); - // Update the end of token position as well as the BufferPtr instance var. - Result.SetEnd(BufferPtr = CurPtr); + // Update the location of the token as well as the BufferPtr instance var. + FormTokenWithChars(Result, CurPtr); } /// LexAngledStringLiteral - Lex the remainder of an angled string literal, @@ -563,7 +460,7 @@ void Lexer::LexAngledStringLiteral(LexerToken &Result, const char *CurPtr) { C = getAndAdvanceChar(CurPtr, Result); } else if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. - Diag(Result.getStart(), diag::err_unterminated_string); + Diag(BufferPtr, diag::err_unterminated_string); BufferPtr = CurPtr-1; return LexTokenInternal(Result); } else if (C == 0) { @@ -576,8 +473,8 @@ void Lexer::LexAngledStringLiteral(LexerToken &Result, const char *CurPtr) { Result.SetKind(tok::angle_string_literal); - // Update the end of token position as well as the BufferPtr instance var. - Result.SetEnd(BufferPtr = CurPtr); + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); } @@ -589,7 +486,7 @@ void Lexer::LexCharConstant(LexerToken &Result, const char *CurPtr) { // Handle the common case of 'x' and '\y' efficiently. char C = getAndAdvanceChar(CurPtr, Result); if (C == '\'') { - Diag(Result.getStart(), diag::err_empty_character); + Diag(BufferPtr, diag::err_empty_character); BufferPtr = CurPtr; return LexTokenInternal(Result); } else if (C == '\\') { @@ -609,7 +506,7 @@ void Lexer::LexCharConstant(LexerToken &Result, const char *CurPtr) { C = getAndAdvanceChar(CurPtr, Result); } else if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. - Diag(Result.getStart(), diag::err_unterminated_char); + Diag(BufferPtr, diag::err_unterminated_char); BufferPtr = CurPtr-1; return LexTokenInternal(Result); } else if (C == 0) { @@ -623,8 +520,8 @@ void Lexer::LexCharConstant(LexerToken &Result, const char *CurPtr) { Result.SetKind(tok::char_constant); - // Update the end of token position as well as the BufferPtr instance var. - Result.SetEnd(BufferPtr = CurPtr); + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); } /// SkipWhitespace - Efficiently skip over a series of whitespace characters. @@ -663,11 +560,11 @@ void Lexer::SkipWhitespace(LexerToken &Result, const char *CurPtr) { // If the next token is obviously a // or /* */ comment, skip it efficiently // too (without going through the big switch stmt). if (Char == '/' && CurPtr[1] == '/') { - Result.SetStart(CurPtr); + BufferPtr = CurPtr; return SkipBCPLComment(Result, CurPtr+1); } if (Char == '/' && CurPtr[1] == '*') { - Result.SetStart(CurPtr); + BufferPtr = CurPtr; return SkipBlockComment(Result, CurPtr+2); } BufferPtr = CurPtr; @@ -680,7 +577,7 @@ void Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) { // If BCPL comments aren't explicitly enabled for this language, emit an // extension warning. if (!Features.BCPLComment) { - Diag(Result.getStart(), diag::ext_bcpl_comment); + Diag(BufferPtr, diag::ext_bcpl_comment); // Mark them enabled so we only emit one warning for this translation // unit. @@ -830,7 +727,7 @@ void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) { // comments. unsigned char C = *CurPtr++; if (C == 0 && CurPtr == BufferEnd+1) { - Diag(Result.getStart(), diag::err_unterminated_block_comment); + Diag(BufferPtr, diag::err_unterminated_block_comment); BufferPtr = CurPtr-1; return; } @@ -860,7 +757,7 @@ void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) { Diag(CurPtr-1, diag::nested_block_comment); } } else if (C == 0 && CurPtr == BufferEnd+1) { - Diag(Result.getStart(), diag::err_unterminated_block_comment); + Diag(BufferPtr, diag::err_unterminated_block_comment); // Note: the user probably forgot a */. We could continue immediately // after the /*, but this would involve lexing a lot of what really is the // comment, which surely would confuse the parser. @@ -906,20 +803,21 @@ void Lexer::LexIncludeFilename(LexerToken &Result) { // No filename? if (Result.getKind() == tok::eom) { - Diag(Result.getStart(), diag::err_pp_expects_filename); + PP.Diag(Result, diag::err_pp_expects_filename); return; } - // Verify that there is nothing after the filename, other than EOM. + // Verify that there is nothing after the filename, other than EOM. Use the + // preprocessor to lex this in case lexing the filename entered a macro. LexerToken EndTok; - Lex(EndTok); + PP.Lex(EndTok); if (EndTok.getKind() != tok::eom) { - Diag(Result.getStart(), diag::err_pp_expects_filename); + PP.Diag(EndTok, diag::ext_pp_extra_tokens_at_eol, "#include"); // Lex until the end of the preprocessor directive line. while (EndTok.getKind() != tok::eom) - Lex(EndTok); + PP.Lex(EndTok); Result.SetKind(tok::eom); } @@ -935,8 +833,6 @@ std::string Lexer::ReadToEndOfLine() { // CurPtr - Cache BufferPtr in an automatic variable. const char *CurPtr = BufferPtr; - Tmp.SetStart(CurPtr); - while (1) { char Char = getAndAdvanceChar(CurPtr, Tmp); switch (Char) { @@ -977,14 +873,15 @@ void Lexer::LexEndOfFile(LexerToken &Result, const char *CurPtr) { // Done parsing the "line". ParsingPreprocessorDirective = false; Result.SetKind(tok::eom); - // Update the end of token position as well as the BufferPtr instance var. - Result.SetEnd(BufferPtr = CurPtr); + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); return; } // If we are in a #if directive, emit an error. while (!ConditionalStack.empty()) { - Diag(ConditionalStack.back().IfLoc, diag::err_pp_unterminated_conditional); + PP.Diag(ConditionalStack.back().IfLoc, + diag::err_pp_unterminated_conditional); ConditionalStack.pop_back(); } @@ -1011,7 +908,6 @@ LexNextToken: // CurPtr - Cache BufferPtr in an automatic variable. const char *CurPtr = BufferPtr; - Result.SetStart(CurPtr); unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below. @@ -1281,7 +1177,7 @@ LexNextToken: CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); } else if (Features.CPPMinMax && Char == '?') { // <? CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - Diag(Result.getStart(), diag::min_max_deprecated); + Diag(BufferPtr, diag::min_max_deprecated); if (getCharAndSize(CurPtr, SizeTmp) == '=') { // <?= Result.SetKind(tok::lessquestionequal); @@ -1308,7 +1204,7 @@ LexNextToken: CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); } else if (Features.CPPMinMax && Char == '?') { CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - Diag(Result.getStart(), diag::min_max_deprecated); + Diag(BufferPtr, diag::min_max_deprecated); if (getCharAndSize(CurPtr, SizeTmp) == '=') { Result.SetKind(tok::greaterquestionequal); // >?= @@ -1418,6 +1314,6 @@ LexNextToken: goto LexNextToken; // GCC isn't tail call eliminating. } - // Update the end of token position as well as the BufferPtr instance var. - Result.SetEnd(BufferPtr = CurPtr); + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); } |

