diff options
Diffstat (limited to 'clang/Lex/Lexer.cpp')
| -rw-r--r-- | clang/Lex/Lexer.cpp | 190 |
1 files changed, 43 insertions, 147 deletions
diff --git a/clang/Lex/Lexer.cpp b/clang/Lex/Lexer.cpp index eaf5ff099fb..e8572ac1653 100644 --- a/clang/Lex/Lexer.cpp +++ b/clang/Lex/Lexer.cpp @@ -32,7 +32,6 @@ #include "clang/Basic/SourceBuffer.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Config/alloca.h" -#include <cassert> #include <cctype> #include <iostream> using namespace llvm; @@ -64,37 +63,6 @@ Lexer::Lexer(const SourceBuffer *File, unsigned fileid, Preprocessor &pp) // LexerToken implementation. //===----------------------------------------------------------------------===// -/// getSourceLocation - Return a source location identifier for the specified -/// offset in the current file. -SourceLocation LexerToken::getSourceLocation() const { - if (TheLexer) - return TheLexer->getSourceLocation(Start); - return SourceLocation(); -} - - -/// dump - Print the token to stderr, used for debugging. -/// -void LexerToken::dump(const LangOptions &Features, bool DumpFlags) const { - std::cerr << clang::tok::getTokenName(Kind) << " '"; - - if (needsCleaning()) - std::cerr << Lexer::getSpelling(*this, Features); - else - std::cerr << std::string(getStart(), getEnd()); - std::cerr << "'"; - - if (DumpFlags) { - std::cerr << "\t"; - if (isAtStartOfLine()) - std::cerr << " [StartOfLine]"; - if (hasLeadingSpace()) - std::cerr << " [LeadingSpace]"; - if (needsCleaning()) - std::cerr << " [Spelling='" << std::string(getStart(), getEnd()) << "']"; - } -} - //===----------------------------------------------------------------------===// // Character information. //===----------------------------------------------------------------------===// @@ -153,6 +121,7 @@ static inline bool isNumberBody(unsigned char c) { return CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD); } + //===----------------------------------------------------------------------===// // Diagnostics forwarding code. //===----------------------------------------------------------------------===// @@ -225,8 +194,8 @@ static char DecodeTrigraphChar(const char *CP, Lexer *L) { /// know that we can accumulate into Size, and that we have already incremented /// Ptr by Size bytes. /// -/// When this method is updated, getCharAndSizeSlowNoWarn (below) should be -/// updated to match. +/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should +/// be updated to match. /// char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, LexerToken *Tok) { @@ -289,13 +258,14 @@ Slash: return *Ptr; } + /// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the /// getCharAndSizeNoWarn method. Here we know that we can accumulate into Size, /// and that we have already incremented Ptr by Size bytes. /// -/// When this method is updated, getCharAndSizeSlow (above) should be updated to -/// match. -static char getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, +/// NOTE: When this method is updated, getCharAndSizeSlow (above) should +/// be updated to match. +char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, const LangOptions &Features) { // If we have a slash, look for an escaped newline. if (Ptr[0] == '\\') { @@ -348,80 +318,6 @@ Slash: return *Ptr; } -/// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever -/// emit a warning. -static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, - const LangOptions &Features) { - // If this is not a trigraph and not a UCN or escaped newline, return - // quickly. - if (Ptr[0] != '?' && Ptr[0] != '\\') { - Size = 1; - return *Ptr; - } - - Size = 0; - return getCharAndSizeSlowNoWarn(Ptr, Size, Features); -} - - -/// getSpelling() - Return the 'spelling' of this token. The spelling of a -/// token are the characters used to represent the token in the source file -/// after trigraph expansion and escaped-newline folding. In particular, this -/// wants to get the true, uncanonicalized, spelling of things like digraphs -/// UCNs, etc. -std::string Lexer::getSpelling(const LexerToken &Tok, - const LangOptions &Features) { - assert(Tok.getStart() <= Tok.getEnd() && "Token character range is bogus!"); - - // If this token contains nothing interesting, return it directly. - if (!Tok.needsCleaning()) - return std::string(Tok.getStart(), Tok.getEnd()); - - // Otherwise, hard case, relex the characters into the string. - std::string Result; - Result.reserve(Tok.getLength()); - - for (const char *Ptr = Tok.getStart(), *End = Tok.getEnd(); Ptr != End; ) { - unsigned CharSize; - Result.push_back(getCharAndSizeNoWarn(Ptr, CharSize, Features)); - Ptr += CharSize; - } - assert(Result.size() != unsigned(Tok.getLength()) && - "NeedsCleaning flag set on something that didn't need cleaning!"); - return Result; -} - -/// getSpelling - This method is used to get the spelling of a token into a -/// preallocated buffer, instead of as an std::string. The caller is required -/// to allocate enough space for the token, which is guaranteed to be at most -/// Tok.End-Tok.Start bytes long. The actual length of the token is returned. -unsigned Lexer::getSpelling(const LexerToken &Tok, char *Buffer, - const LangOptions &Features) { - assert(Tok.getStart() <= Tok.getEnd() && "Token character range is bogus!"); - - // If this token contains nothing interesting, return it directly. - if (!Tok.needsCleaning()) { - unsigned Size = Tok.getLength(); - memcpy(Buffer, Tok.getStart(), Size); - return Size; - } - // Otherwise, hard case, relex the characters into the string. - std::string Result; - Result.reserve(Tok.getLength()); - - char *OutBuf = Buffer; - for (const char *Ptr = Tok.getStart(), *End = Tok.getEnd(); Ptr != End; ) { - unsigned CharSize; - *OutBuf++ = getCharAndSizeNoWarn(Ptr, CharSize, Features); - Ptr += CharSize; - } - assert(unsigned(OutBuf-Buffer) != Tok.getLength() && - "NeedsCleaning flag set on something that didn't need cleaning!"); - - return OutBuf-Buffer; -} - - //===----------------------------------------------------------------------===// // Helper methods for lexing. //===----------------------------------------------------------------------===// @@ -440,19 +336,20 @@ void Lexer::LexIdentifier(LexerToken &Result, const char *CurPtr) { // FIXME: universal chars. if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) { FinishIdentifier: - Result.SetEnd(BufferPtr = CurPtr); + const char *IdStart = BufferPtr, *IdEnd = CurPtr; + FormTokenWithChars(Result, CurPtr); Result.SetKind(tok::identifier); // Look up this token, see if it is a macro, or if it is a language keyword. const char *SpelledTokStart, *SpelledTokEnd; if (!Result.needsCleaning()) { // No cleaning needed, just use the characters from the lexed buffer. - SpelledTokStart = Result.getStart(); - SpelledTokEnd = Result.getEnd(); + SpelledTokStart = IdStart; + SpelledTokEnd = IdEnd; } else { // Cleaning needed, alloca a buffer, clean into it, then use the buffer. char *TmpBuf = (char*)alloca(Result.getLength()); - unsigned Size = getSpelling(Result, TmpBuf); + unsigned Size = PP.getSpelling(Result, TmpBuf); SpelledTokStart = TmpBuf; SpelledTokEnd = TmpBuf+Size; } @@ -516,8 +413,8 @@ void Lexer::LexNumericConstant(LexerToken &Result, const char *CurPtr) { Result.SetKind(tok::numeric_constant); - // Update the end of token position as well as the BufferPtr instance var. - Result.SetEnd(BufferPtr = CurPtr); + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); } /// LexStringLiteral - Lex the remainder of a string literal, after having lexed @@ -533,7 +430,7 @@ void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) { C = getAndAdvanceChar(CurPtr, Result); } else if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. - Diag(Result.getStart(), diag::err_unterminated_string); + Diag(BufferPtr, diag::err_unterminated_string); BufferPtr = CurPtr-1; return LexTokenInternal(Result); } else if (C == 0) { @@ -546,8 +443,8 @@ void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) { Result.SetKind(tok::string_literal); - // Update the end of token position as well as the BufferPtr instance var. - Result.SetEnd(BufferPtr = CurPtr); + // Update the location of the token as well as the BufferPtr instance var. + FormTokenWithChars(Result, CurPtr); } /// LexAngledStringLiteral - Lex the remainder of an angled string literal, @@ -563,7 +460,7 @@ void Lexer::LexAngledStringLiteral(LexerToken &Result, const char *CurPtr) { C = getAndAdvanceChar(CurPtr, Result); } else if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. - Diag(Result.getStart(), diag::err_unterminated_string); + Diag(BufferPtr, diag::err_unterminated_string); BufferPtr = CurPtr-1; return LexTokenInternal(Result); } else if (C == 0) { @@ -576,8 +473,8 @@ void Lexer::LexAngledStringLiteral(LexerToken &Result, const char *CurPtr) { Result.SetKind(tok::angle_string_literal); - // Update the end of token position as well as the BufferPtr instance var. - Result.SetEnd(BufferPtr = CurPtr); + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); } @@ -589,7 +486,7 @@ void Lexer::LexCharConstant(LexerToken &Result, const char *CurPtr) { // Handle the common case of 'x' and '\y' efficiently. char C = getAndAdvanceChar(CurPtr, Result); if (C == '\'') { - Diag(Result.getStart(), diag::err_empty_character); + Diag(BufferPtr, diag::err_empty_character); BufferPtr = CurPtr; return LexTokenInternal(Result); } else if (C == '\\') { @@ -609,7 +506,7 @@ void Lexer::LexCharConstant(LexerToken &Result, const char *CurPtr) { C = getAndAdvanceChar(CurPtr, Result); } else if (C == '\n' || C == '\r' || // Newline. (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. - Diag(Result.getStart(), diag::err_unterminated_char); + Diag(BufferPtr, diag::err_unterminated_char); BufferPtr = CurPtr-1; return LexTokenInternal(Result); } else if (C == 0) { @@ -623,8 +520,8 @@ void Lexer::LexCharConstant(LexerToken &Result, const char *CurPtr) { Result.SetKind(tok::char_constant); - // Update the end of token position as well as the BufferPtr instance var. - Result.SetEnd(BufferPtr = CurPtr); + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); } /// SkipWhitespace - Efficiently skip over a series of whitespace characters. @@ -663,11 +560,11 @@ void Lexer::SkipWhitespace(LexerToken &Result, const char *CurPtr) { // If the next token is obviously a // or /* */ comment, skip it efficiently // too (without going through the big switch stmt). if (Char == '/' && CurPtr[1] == '/') { - Result.SetStart(CurPtr); + BufferPtr = CurPtr; return SkipBCPLComment(Result, CurPtr+1); } if (Char == '/' && CurPtr[1] == '*') { - Result.SetStart(CurPtr); + BufferPtr = CurPtr; return SkipBlockComment(Result, CurPtr+2); } BufferPtr = CurPtr; @@ -680,7 +577,7 @@ void Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) { // If BCPL comments aren't explicitly enabled for this language, emit an // extension warning. if (!Features.BCPLComment) { - Diag(Result.getStart(), diag::ext_bcpl_comment); + Diag(BufferPtr, diag::ext_bcpl_comment); // Mark them enabled so we only emit one warning for this translation // unit. @@ -830,7 +727,7 @@ void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) { // comments. unsigned char C = *CurPtr++; if (C == 0 && CurPtr == BufferEnd+1) { - Diag(Result.getStart(), diag::err_unterminated_block_comment); + Diag(BufferPtr, diag::err_unterminated_block_comment); BufferPtr = CurPtr-1; return; } @@ -860,7 +757,7 @@ void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) { Diag(CurPtr-1, diag::nested_block_comment); } } else if (C == 0 && CurPtr == BufferEnd+1) { - Diag(Result.getStart(), diag::err_unterminated_block_comment); + Diag(BufferPtr, diag::err_unterminated_block_comment); // Note: the user probably forgot a */. We could continue immediately // after the /*, but this would involve lexing a lot of what really is the // comment, which surely would confuse the parser. @@ -906,20 +803,21 @@ void Lexer::LexIncludeFilename(LexerToken &Result) { // No filename? if (Result.getKind() == tok::eom) { - Diag(Result.getStart(), diag::err_pp_expects_filename); + PP.Diag(Result, diag::err_pp_expects_filename); return; } - // Verify that there is nothing after the filename, other than EOM. + // Verify that there is nothing after the filename, other than EOM. Use the + // preprocessor to lex this in case lexing the filename entered a macro. LexerToken EndTok; - Lex(EndTok); + PP.Lex(EndTok); if (EndTok.getKind() != tok::eom) { - Diag(Result.getStart(), diag::err_pp_expects_filename); + PP.Diag(EndTok, diag::ext_pp_extra_tokens_at_eol, "#include"); // Lex until the end of the preprocessor directive line. while (EndTok.getKind() != tok::eom) - Lex(EndTok); + PP.Lex(EndTok); Result.SetKind(tok::eom); } @@ -935,8 +833,6 @@ std::string Lexer::ReadToEndOfLine() { // CurPtr - Cache BufferPtr in an automatic variable. const char *CurPtr = BufferPtr; - Tmp.SetStart(CurPtr); - while (1) { char Char = getAndAdvanceChar(CurPtr, Tmp); switch (Char) { @@ -977,14 +873,15 @@ void Lexer::LexEndOfFile(LexerToken &Result, const char *CurPtr) { // Done parsing the "line". ParsingPreprocessorDirective = false; Result.SetKind(tok::eom); - // Update the end of token position as well as the BufferPtr instance var. - Result.SetEnd(BufferPtr = CurPtr); + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); return; } // If we are in a #if directive, emit an error. while (!ConditionalStack.empty()) { - Diag(ConditionalStack.back().IfLoc, diag::err_pp_unterminated_conditional); + PP.Diag(ConditionalStack.back().IfLoc, + diag::err_pp_unterminated_conditional); ConditionalStack.pop_back(); } @@ -1011,7 +908,6 @@ LexNextToken: // CurPtr - Cache BufferPtr in an automatic variable. const char *CurPtr = BufferPtr; - Result.SetStart(CurPtr); unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below. @@ -1281,7 +1177,7 @@ LexNextToken: CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); } else if (Features.CPPMinMax && Char == '?') { // <? CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - Diag(Result.getStart(), diag::min_max_deprecated); + Diag(BufferPtr, diag::min_max_deprecated); if (getCharAndSize(CurPtr, SizeTmp) == '=') { // <?= Result.SetKind(tok::lessquestionequal); @@ -1308,7 +1204,7 @@ LexNextToken: CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); } else if (Features.CPPMinMax && Char == '?') { CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); - Diag(Result.getStart(), diag::min_max_deprecated); + Diag(BufferPtr, diag::min_max_deprecated); if (getCharAndSize(CurPtr, SizeTmp) == '=') { Result.SetKind(tok::greaterquestionequal); // >?= @@ -1418,6 +1314,6 @@ LexNextToken: goto LexNextToken; // GCC isn't tail call eliminating. } - // Update the end of token position as well as the BufferPtr instance var. - Result.SetEnd(BufferPtr = CurPtr); + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); } |

