summaryrefslogtreecommitdiffstats
path: root/clang/lib/Lex/Lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/Lex/Lexer.cpp')
-rw-r--r--clang/lib/Lex/Lexer.cpp150
1 files changed, 90 insertions, 60 deletions
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index c7eab490ad6..cfa835d173d 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1445,7 +1445,50 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C,
<< Range;
}
}
- }
+}
+
+bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size,
+ Token &Result) {
+ const char *UCNPtr = CurPtr + Size;
+ uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/0);
+ if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts))
+ return false;
+
+ if (!isLexingRawMode())
+ maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
+ makeCharRange(*this, CurPtr, UCNPtr),
+ /*IsFirst=*/false);
+
+ Result.setFlag(Token::HasUCN);
+ if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') ||
+ (UCNPtr - CurPtr == 10 && CurPtr[1] == 'U'))
+ CurPtr = UCNPtr;
+ else
+ while (CurPtr != UCNPtr)
+ (void)getAndAdvanceChar(CurPtr, Result);
+ return true;
+}
+
+bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) {
+ const char *UnicodePtr = CurPtr;
+ UTF32 CodePoint;
+ ConversionResult Result =
+ llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr,
+ (const UTF8 *)BufferEnd,
+ &CodePoint,
+ strictConversion);
+ if (Result != conversionOK ||
+ !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts))
+ return false;
+
+ if (!isLexingRawMode())
+ maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
+ makeCharRange(*this, CurPtr, UnicodePtr),
+ /*IsFirst=*/false);
+
+ CurPtr = UnicodePtr;
+ return true;
+}
bool Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
// Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
@@ -1500,47 +1543,10 @@ FinishIdentifier:
C = getCharAndSize(CurPtr, Size);
continue;
- } else if (C == '\\') {
- const char *UCNPtr = CurPtr + Size;
- uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/0);
- if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts))
- goto FinishIdentifier;
-
- if (!isLexingRawMode()) {
- maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
- makeCharRange(*this, CurPtr, UCNPtr),
- /*IsFirst=*/false);
- }
-
- Result.setFlag(Token::HasUCN);
- if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') ||
- (UCNPtr - CurPtr == 10 && CurPtr[1] == 'U'))
- CurPtr = UCNPtr;
- else
- while (CurPtr != UCNPtr)
- (void)getAndAdvanceChar(CurPtr, Result);
-
+ } else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
C = getCharAndSize(CurPtr, Size);
continue;
- } else if (!isASCII(C)) {
- const char *UnicodePtr = CurPtr;
- UTF32 CodePoint;
- ConversionResult Result =
- llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr,
- (const UTF8 *)BufferEnd,
- &CodePoint,
- strictConversion);
- if (Result != conversionOK ||
- !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts))
- goto FinishIdentifier;
-
- if (!isLexingRawMode()) {
- maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint,
- makeCharRange(*this, CurPtr, UnicodePtr),
- /*IsFirst=*/false);
- }
-
- CurPtr = UnicodePtr;
+ } else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
C = getCharAndSize(CurPtr, Size);
continue;
} else if (!isIdentifierBody(C)) {
@@ -1576,7 +1582,7 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
unsigned Size;
char C = getCharAndSize(CurPtr, Size);
char PrevCh = 0;
- while (isPreprocessingNumberBody(C)) { // FIXME: UCNs in ud-suffix.
+ while (isPreprocessingNumberBody(C)) {
CurPtr = ConsumeChar(CurPtr, Size, Result);
PrevCh = C;
C = getCharAndSize(CurPtr, Size);
@@ -1618,6 +1624,12 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
}
}
+ // If we have a UCN or UTF-8 character (perhaps in a ud-suffix), continue.
+ if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
+ return LexNumericConstant(Result, CurPtr);
+ if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
+ return LexNumericConstant(Result, CurPtr);
+
// Update the location of token as well as BufferPtr.
const char *TokStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
@@ -1631,23 +1643,35 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
bool IsStringLiteral) {
assert(getLangOpts().CPlusPlus);
- // Maximally munch an identifier. FIXME: UCNs.
+ // Maximally munch an identifier.
unsigned Size;
char C = getCharAndSize(CurPtr, Size);
- if (isIdentifierHead(C)) {
- if (!getLangOpts().CPlusPlus11) {
- if (!isLexingRawMode())
- Diag(CurPtr,
- C == '_' ? diag::warn_cxx11_compat_user_defined_literal
- : diag::warn_cxx11_compat_reserved_user_defined_literal)
- << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
+ bool Consumed = false;
+
+ if (!isIdentifierHead(C)) {
+ if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
+ Consumed = true;
+ else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
+ Consumed = true;
+ else
return CurPtr;
- }
+ }
- // C++11 [lex.ext]p10, [usrlit.suffix]p1: A program containing a ud-suffix
- // that does not start with an underscore is ill-formed. As a conforming
- // extension, we treat all such suffixes as if they had whitespace before
- // them.
+ if (!getLangOpts().CPlusPlus11) {
+ if (!isLexingRawMode())
+ Diag(CurPtr,
+ C == '_' ? diag::warn_cxx11_compat_user_defined_literal
+ : diag::warn_cxx11_compat_reserved_user_defined_literal)
+ << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
+ return CurPtr;
+ }
+
+ // C++11 [lex.ext]p10, [usrlit.suffix]p1: A program containing a ud-suffix
+ // that does not start with an underscore is ill-formed. As a conforming
+ // extension, we treat all such suffixes as if they had whitespace before
+ // them. We assume a suffix beginning with a UCN or UTF-8 character is more
+ // likely to be a ud-suffix than a macro, however, and accept that.
+ if (!Consumed) {
bool IsUDSuffix = false;
if (C == '_')
IsUDSuffix = true;
@@ -1685,16 +1709,22 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
Diag(CurPtr, getLangOpts().MSVCCompat
? diag::ext_ms_reserved_user_defined_literal
: diag::ext_reserved_user_defined_literal)
- << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
+ << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " ");
return CurPtr;
}
- Result.setFlag(Token::HasUDSuffix);
- do {
- CurPtr = ConsumeChar(CurPtr, Size, Result);
- C = getCharAndSize(CurPtr, Size);
- } while (isIdentifierBody(C));
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
}
+
+ Result.setFlag(Token::HasUDSuffix);
+ while (true) {
+ C = getCharAndSize(CurPtr, Size);
+ if (isIdentifierBody(C)) { CurPtr = ConsumeChar(CurPtr, Size, Result); }
+ else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {}
+ else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {}
+ else break;
+ }
+
return CurPtr;
}
OpenPOWER on IntegriCloud