summaryrefslogtreecommitdiffstats
path: root/clang/Lex/Lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/Lex/Lexer.cpp')
-rw-r--r--clang/Lex/Lexer.cpp190
1 files changed, 43 insertions, 147 deletions
diff --git a/clang/Lex/Lexer.cpp b/clang/Lex/Lexer.cpp
index eaf5ff099fb..e8572ac1653 100644
--- a/clang/Lex/Lexer.cpp
+++ b/clang/Lex/Lexer.cpp
@@ -32,7 +32,6 @@
#include "clang/Basic/SourceBuffer.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/Config/alloca.h"
-#include <cassert>
#include <cctype>
#include <iostream>
using namespace llvm;
@@ -64,37 +63,6 @@ Lexer::Lexer(const SourceBuffer *File, unsigned fileid, Preprocessor &pp)
// LexerToken implementation.
//===----------------------------------------------------------------------===//
-/// getSourceLocation - Return a source location identifier for the specified
-/// offset in the current file.
-SourceLocation LexerToken::getSourceLocation() const {
- if (TheLexer)
- return TheLexer->getSourceLocation(Start);
- return SourceLocation();
-}
-
-
-/// dump - Print the token to stderr, used for debugging.
-///
-void LexerToken::dump(const LangOptions &Features, bool DumpFlags) const {
- std::cerr << clang::tok::getTokenName(Kind) << " '";
-
- if (needsCleaning())
- std::cerr << Lexer::getSpelling(*this, Features);
- else
- std::cerr << std::string(getStart(), getEnd());
- std::cerr << "'";
-
- if (DumpFlags) {
- std::cerr << "\t";
- if (isAtStartOfLine())
- std::cerr << " [StartOfLine]";
- if (hasLeadingSpace())
- std::cerr << " [LeadingSpace]";
- if (needsCleaning())
- std::cerr << " [Spelling='" << std::string(getStart(), getEnd()) << "']";
- }
-}
-
//===----------------------------------------------------------------------===//
// Character information.
//===----------------------------------------------------------------------===//
@@ -153,6 +121,7 @@ static inline bool isNumberBody(unsigned char c) {
return CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD);
}
+
//===----------------------------------------------------------------------===//
// Diagnostics forwarding code.
//===----------------------------------------------------------------------===//
@@ -225,8 +194,8 @@ static char DecodeTrigraphChar(const char *CP, Lexer *L) {
/// know that we can accumulate into Size, and that we have already incremented
/// Ptr by Size bytes.
///
-/// When this method is updated, getCharAndSizeSlowNoWarn (below) should be
-/// updated to match.
+/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should
+/// be updated to match.
///
char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size,
LexerToken *Tok) {
@@ -289,13 +258,14 @@ Slash:
return *Ptr;
}
+
/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the
/// getCharAndSizeNoWarn method. Here we know that we can accumulate into Size,
/// and that we have already incremented Ptr by Size bytes.
///
-/// When this method is updated, getCharAndSizeSlow (above) should be updated to
-/// match.
-static char getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
+/// NOTE: When this method is updated, getCharAndSizeSlow (above) should
+/// be updated to match.
+char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size,
const LangOptions &Features) {
// If we have a slash, look for an escaped newline.
if (Ptr[0] == '\\') {
@@ -348,80 +318,6 @@ Slash:
return *Ptr;
}
-/// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever
-/// emit a warning.
-static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size,
- const LangOptions &Features) {
- // If this is not a trigraph and not a UCN or escaped newline, return
- // quickly.
- if (Ptr[0] != '?' && Ptr[0] != '\\') {
- Size = 1;
- return *Ptr;
- }
-
- Size = 0;
- return getCharAndSizeSlowNoWarn(Ptr, Size, Features);
-}
-
-
-/// getSpelling() - Return the 'spelling' of this token. The spelling of a
-/// token are the characters used to represent the token in the source file
-/// after trigraph expansion and escaped-newline folding. In particular, this
-/// wants to get the true, uncanonicalized, spelling of things like digraphs
-/// UCNs, etc.
-std::string Lexer::getSpelling(const LexerToken &Tok,
- const LangOptions &Features) {
- assert(Tok.getStart() <= Tok.getEnd() && "Token character range is bogus!");
-
- // If this token contains nothing interesting, return it directly.
- if (!Tok.needsCleaning())
- return std::string(Tok.getStart(), Tok.getEnd());
-
- // Otherwise, hard case, relex the characters into the string.
- std::string Result;
- Result.reserve(Tok.getLength());
-
- for (const char *Ptr = Tok.getStart(), *End = Tok.getEnd(); Ptr != End; ) {
- unsigned CharSize;
- Result.push_back(getCharAndSizeNoWarn(Ptr, CharSize, Features));
- Ptr += CharSize;
- }
- assert(Result.size() != unsigned(Tok.getLength()) &&
- "NeedsCleaning flag set on something that didn't need cleaning!");
- return Result;
-}
-
-/// getSpelling - This method is used to get the spelling of a token into a
-/// preallocated buffer, instead of as an std::string. The caller is required
-/// to allocate enough space for the token, which is guaranteed to be at most
-/// Tok.End-Tok.Start bytes long. The actual length of the token is returned.
-unsigned Lexer::getSpelling(const LexerToken &Tok, char *Buffer,
- const LangOptions &Features) {
- assert(Tok.getStart() <= Tok.getEnd() && "Token character range is bogus!");
-
- // If this token contains nothing interesting, return it directly.
- if (!Tok.needsCleaning()) {
- unsigned Size = Tok.getLength();
- memcpy(Buffer, Tok.getStart(), Size);
- return Size;
- }
- // Otherwise, hard case, relex the characters into the string.
- std::string Result;
- Result.reserve(Tok.getLength());
-
- char *OutBuf = Buffer;
- for (const char *Ptr = Tok.getStart(), *End = Tok.getEnd(); Ptr != End; ) {
- unsigned CharSize;
- *OutBuf++ = getCharAndSizeNoWarn(Ptr, CharSize, Features);
- Ptr += CharSize;
- }
- assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
- "NeedsCleaning flag set on something that didn't need cleaning!");
-
- return OutBuf-Buffer;
-}
-
-
//===----------------------------------------------------------------------===//
// Helper methods for lexing.
//===----------------------------------------------------------------------===//
@@ -440,19 +336,20 @@ void Lexer::LexIdentifier(LexerToken &Result, const char *CurPtr) {
// FIXME: universal chars.
if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) {
FinishIdentifier:
- Result.SetEnd(BufferPtr = CurPtr);
+ const char *IdStart = BufferPtr, *IdEnd = CurPtr;
+ FormTokenWithChars(Result, CurPtr);
Result.SetKind(tok::identifier);
// Look up this token, see if it is a macro, or if it is a language keyword.
const char *SpelledTokStart, *SpelledTokEnd;
if (!Result.needsCleaning()) {
// No cleaning needed, just use the characters from the lexed buffer.
- SpelledTokStart = Result.getStart();
- SpelledTokEnd = Result.getEnd();
+ SpelledTokStart = IdStart;
+ SpelledTokEnd = IdEnd;
} else {
// Cleaning needed, alloca a buffer, clean into it, then use the buffer.
char *TmpBuf = (char*)alloca(Result.getLength());
- unsigned Size = getSpelling(Result, TmpBuf);
+ unsigned Size = PP.getSpelling(Result, TmpBuf);
SpelledTokStart = TmpBuf;
SpelledTokEnd = TmpBuf+Size;
}
@@ -516,8 +413,8 @@ void Lexer::LexNumericConstant(LexerToken &Result, const char *CurPtr) {
Result.SetKind(tok::numeric_constant);
- // Update the end of token position as well as the BufferPtr instance var.
- Result.SetEnd(BufferPtr = CurPtr);
+ // Update the location of token as well as BufferPtr.
+ FormTokenWithChars(Result, CurPtr);
}
/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
@@ -533,7 +430,7 @@ void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) {
C = getAndAdvanceChar(CurPtr, Result);
} else if (C == '\n' || C == '\r' || // Newline.
(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
- Diag(Result.getStart(), diag::err_unterminated_string);
+ Diag(BufferPtr, diag::err_unterminated_string);
BufferPtr = CurPtr-1;
return LexTokenInternal(Result);
} else if (C == 0) {
@@ -546,8 +443,8 @@ void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr) {
Result.SetKind(tok::string_literal);
- // Update the end of token position as well as the BufferPtr instance var.
- Result.SetEnd(BufferPtr = CurPtr);
+ // Update the location of the token as well as the BufferPtr instance var.
+ FormTokenWithChars(Result, CurPtr);
}
/// LexAngledStringLiteral - Lex the remainder of an angled string literal,
@@ -563,7 +460,7 @@ void Lexer::LexAngledStringLiteral(LexerToken &Result, const char *CurPtr) {
C = getAndAdvanceChar(CurPtr, Result);
} else if (C == '\n' || C == '\r' || // Newline.
(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
- Diag(Result.getStart(), diag::err_unterminated_string);
+ Diag(BufferPtr, diag::err_unterminated_string);
BufferPtr = CurPtr-1;
return LexTokenInternal(Result);
} else if (C == 0) {
@@ -576,8 +473,8 @@ void Lexer::LexAngledStringLiteral(LexerToken &Result, const char *CurPtr) {
Result.SetKind(tok::angle_string_literal);
- // Update the end of token position as well as the BufferPtr instance var.
- Result.SetEnd(BufferPtr = CurPtr);
+ // Update the location of token as well as BufferPtr.
+ FormTokenWithChars(Result, CurPtr);
}
@@ -589,7 +486,7 @@ void Lexer::LexCharConstant(LexerToken &Result, const char *CurPtr) {
// Handle the common case of 'x' and '\y' efficiently.
char C = getAndAdvanceChar(CurPtr, Result);
if (C == '\'') {
- Diag(Result.getStart(), diag::err_empty_character);
+ Diag(BufferPtr, diag::err_empty_character);
BufferPtr = CurPtr;
return LexTokenInternal(Result);
} else if (C == '\\') {
@@ -609,7 +506,7 @@ void Lexer::LexCharConstant(LexerToken &Result, const char *CurPtr) {
C = getAndAdvanceChar(CurPtr, Result);
} else if (C == '\n' || C == '\r' || // Newline.
(C == 0 && CurPtr-1 == BufferEnd)) { // End of file.
- Diag(Result.getStart(), diag::err_unterminated_char);
+ Diag(BufferPtr, diag::err_unterminated_char);
BufferPtr = CurPtr-1;
return LexTokenInternal(Result);
} else if (C == 0) {
@@ -623,8 +520,8 @@ void Lexer::LexCharConstant(LexerToken &Result, const char *CurPtr) {
Result.SetKind(tok::char_constant);
- // Update the end of token position as well as the BufferPtr instance var.
- Result.SetEnd(BufferPtr = CurPtr);
+ // Update the location of token as well as BufferPtr.
+ FormTokenWithChars(Result, CurPtr);
}
/// SkipWhitespace - Efficiently skip over a series of whitespace characters.
@@ -663,11 +560,11 @@ void Lexer::SkipWhitespace(LexerToken &Result, const char *CurPtr) {
// If the next token is obviously a // or /* */ comment, skip it efficiently
// too (without going through the big switch stmt).
if (Char == '/' && CurPtr[1] == '/') {
- Result.SetStart(CurPtr);
+ BufferPtr = CurPtr;
return SkipBCPLComment(Result, CurPtr+1);
}
if (Char == '/' && CurPtr[1] == '*') {
- Result.SetStart(CurPtr);
+ BufferPtr = CurPtr;
return SkipBlockComment(Result, CurPtr+2);
}
BufferPtr = CurPtr;
@@ -680,7 +577,7 @@ void Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) {
// If BCPL comments aren't explicitly enabled for this language, emit an
// extension warning.
if (!Features.BCPLComment) {
- Diag(Result.getStart(), diag::ext_bcpl_comment);
+ Diag(BufferPtr, diag::ext_bcpl_comment);
// Mark them enabled so we only emit one warning for this translation
// unit.
@@ -830,7 +727,7 @@ void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) {
// comments.
unsigned char C = *CurPtr++;
if (C == 0 && CurPtr == BufferEnd+1) {
- Diag(Result.getStart(), diag::err_unterminated_block_comment);
+ Diag(BufferPtr, diag::err_unterminated_block_comment);
BufferPtr = CurPtr-1;
return;
}
@@ -860,7 +757,7 @@ void Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) {
Diag(CurPtr-1, diag::nested_block_comment);
}
} else if (C == 0 && CurPtr == BufferEnd+1) {
- Diag(Result.getStart(), diag::err_unterminated_block_comment);
+ Diag(BufferPtr, diag::err_unterminated_block_comment);
// Note: the user probably forgot a */. We could continue immediately
// after the /*, but this would involve lexing a lot of what really is the
// comment, which surely would confuse the parser.
@@ -906,20 +803,21 @@ void Lexer::LexIncludeFilename(LexerToken &Result) {
// No filename?
if (Result.getKind() == tok::eom) {
- Diag(Result.getStart(), diag::err_pp_expects_filename);
+ PP.Diag(Result, diag::err_pp_expects_filename);
return;
}
- // Verify that there is nothing after the filename, other than EOM.
+ // Verify that there is nothing after the filename, other than EOM. Use the
+ // preprocessor to lex this in case lexing the filename entered a macro.
LexerToken EndTok;
- Lex(EndTok);
+ PP.Lex(EndTok);
if (EndTok.getKind() != tok::eom) {
- Diag(Result.getStart(), diag::err_pp_expects_filename);
+ PP.Diag(EndTok, diag::ext_pp_extra_tokens_at_eol, "#include");
// Lex until the end of the preprocessor directive line.
while (EndTok.getKind() != tok::eom)
- Lex(EndTok);
+ PP.Lex(EndTok);
Result.SetKind(tok::eom);
}
@@ -935,8 +833,6 @@ std::string Lexer::ReadToEndOfLine() {
// CurPtr - Cache BufferPtr in an automatic variable.
const char *CurPtr = BufferPtr;
- Tmp.SetStart(CurPtr);
-
while (1) {
char Char = getAndAdvanceChar(CurPtr, Tmp);
switch (Char) {
@@ -977,14 +873,15 @@ void Lexer::LexEndOfFile(LexerToken &Result, const char *CurPtr) {
// Done parsing the "line".
ParsingPreprocessorDirective = false;
Result.SetKind(tok::eom);
- // Update the end of token position as well as the BufferPtr instance var.
- Result.SetEnd(BufferPtr = CurPtr);
+ // Update the location of token as well as BufferPtr.
+ FormTokenWithChars(Result, CurPtr);
return;
}
// If we are in a #if directive, emit an error.
while (!ConditionalStack.empty()) {
- Diag(ConditionalStack.back().IfLoc, diag::err_pp_unterminated_conditional);
+ PP.Diag(ConditionalStack.back().IfLoc,
+ diag::err_pp_unterminated_conditional);
ConditionalStack.pop_back();
}
@@ -1011,7 +908,6 @@ LexNextToken:
// CurPtr - Cache BufferPtr in an automatic variable.
const char *CurPtr = BufferPtr;
- Result.SetStart(CurPtr);
unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below.
@@ -1281,7 +1177,7 @@ LexNextToken:
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else if (Features.CPPMinMax && Char == '?') { // <?
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
- Diag(Result.getStart(), diag::min_max_deprecated);
+ Diag(BufferPtr, diag::min_max_deprecated);
if (getCharAndSize(CurPtr, SizeTmp) == '=') { // <?=
Result.SetKind(tok::lessquestionequal);
@@ -1308,7 +1204,7 @@ LexNextToken:
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else if (Features.CPPMinMax && Char == '?') {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
- Diag(Result.getStart(), diag::min_max_deprecated);
+ Diag(BufferPtr, diag::min_max_deprecated);
if (getCharAndSize(CurPtr, SizeTmp) == '=') {
Result.SetKind(tok::greaterquestionequal); // >?=
@@ -1418,6 +1314,6 @@ LexNextToken:
goto LexNextToken; // GCC isn't tail call eliminating.
}
- // Update the end of token position as well as the BufferPtr instance var.
- Result.SetEnd(BufferPtr = CurPtr);
+ // Update the location of token as well as BufferPtr.
+ FormTokenWithChars(Result, CurPtr);
}
OpenPOWER on IntegriCloud