diff options
Diffstat (limited to 'clang/lib/Lex/Lexer.cpp')
| -rw-r--r-- | clang/lib/Lex/Lexer.cpp | 150 |
1 files changed, 90 insertions, 60 deletions
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index c7eab490ad6..cfa835d173d 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -1445,7 +1445,50 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, << Range; } } - } +} + +bool Lexer::tryConsumeIdentifierUCN(const char *&CurPtr, unsigned Size, + Token &Result) { + const char *UCNPtr = CurPtr + Size; + uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/0); + if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts)) + return false; + + if (!isLexingRawMode()) + maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, + makeCharRange(*this, CurPtr, UCNPtr), + /*IsFirst=*/false); + + Result.setFlag(Token::HasUCN); + if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') || + (UCNPtr - CurPtr == 10 && CurPtr[1] == 'U')) + CurPtr = UCNPtr; + else + while (CurPtr != UCNPtr) + (void)getAndAdvanceChar(CurPtr, Result); + return true; +} + +bool Lexer::tryConsumeIdentifierUTF8Char(const char *&CurPtr) { + const char *UnicodePtr = CurPtr; + UTF32 CodePoint; + ConversionResult Result = + llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr, + (const UTF8 *)BufferEnd, + &CodePoint, + strictConversion); + if (Result != conversionOK || + !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts)) + return false; + + if (!isLexingRawMode()) + maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, + makeCharRange(*this, CurPtr, UnicodePtr), + /*IsFirst=*/false); + + CurPtr = UnicodePtr; + return true; +} bool Lexer::LexIdentifier(Token &Result, const char *CurPtr) { // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$] @@ -1500,47 +1543,10 @@ FinishIdentifier: C = getCharAndSize(CurPtr, Size); continue; - } else if (C == '\\') { - const char *UCNPtr = CurPtr + Size; - uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr, /*Token=*/0); - if (CodePoint == 0 || !isAllowedIDChar(CodePoint, LangOpts)) - goto FinishIdentifier; - - if (!isLexingRawMode()) { - maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, - makeCharRange(*this, CurPtr, UCNPtr), - /*IsFirst=*/false); - } - - Result.setFlag(Token::HasUCN); - if ((UCNPtr - CurPtr == 6 && CurPtr[1] == 'u') || - (UCNPtr - CurPtr == 10 && CurPtr[1] == 'U')) - CurPtr = UCNPtr; - else - while (CurPtr != UCNPtr) - (void)getAndAdvanceChar(CurPtr, Result); - + } else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) { C = getCharAndSize(CurPtr, Size); continue; - } else if (!isASCII(C)) { - const char *UnicodePtr = CurPtr; - UTF32 CodePoint; - ConversionResult Result = - llvm::convertUTF8Sequence((const UTF8 **)&UnicodePtr, - (const UTF8 *)BufferEnd, - &CodePoint, - strictConversion); - if (Result != conversionOK || - !isAllowedIDChar(static_cast<uint32_t>(CodePoint), LangOpts)) - goto FinishIdentifier; - - if (!isLexingRawMode()) { - maybeDiagnoseIDCharCompat(PP->getDiagnostics(), CodePoint, - makeCharRange(*this, CurPtr, UnicodePtr), - /*IsFirst=*/false); - } - - CurPtr = UnicodePtr; + } else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) { C = getCharAndSize(CurPtr, Size); continue; } else if (!isIdentifierBody(C)) { @@ -1576,7 +1582,7 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { unsigned Size; char C = getCharAndSize(CurPtr, Size); char PrevCh = 0; - while (isPreprocessingNumberBody(C)) { // FIXME: UCNs in ud-suffix. + while (isPreprocessingNumberBody(C)) { CurPtr = ConsumeChar(CurPtr, Size, Result); PrevCh = C; C = getCharAndSize(CurPtr, Size); @@ -1618,6 +1624,12 @@ bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) { } } + // If we have a UCN or UTF-8 character (perhaps in a ud-suffix), continue. + if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) + return LexNumericConstant(Result, CurPtr); + if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) + return LexNumericConstant(Result, CurPtr); + // Update the location of token as well as BufferPtr. const char *TokStart = BufferPtr; FormTokenWithChars(Result, CurPtr, tok::numeric_constant); @@ -1631,23 +1643,35 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr, bool IsStringLiteral) { assert(getLangOpts().CPlusPlus); - // Maximally munch an identifier. FIXME: UCNs. + // Maximally munch an identifier. unsigned Size; char C = getCharAndSize(CurPtr, Size); - if (isIdentifierHead(C)) { - if (!getLangOpts().CPlusPlus11) { - if (!isLexingRawMode()) - Diag(CurPtr, - C == '_' ? diag::warn_cxx11_compat_user_defined_literal - : diag::warn_cxx11_compat_reserved_user_defined_literal) - << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); + bool Consumed = false; + + if (!isIdentifierHead(C)) { + if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) + Consumed = true; + else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) + Consumed = true; + else return CurPtr; - } + } - // C++11 [lex.ext]p10, [usrlit.suffix]p1: A program containing a ud-suffix - // that does not start with an underscore is ill-formed. As a conforming - // extension, we treat all such suffixes as if they had whitespace before - // them. + if (!getLangOpts().CPlusPlus11) { + if (!isLexingRawMode()) + Diag(CurPtr, + C == '_' ? diag::warn_cxx11_compat_user_defined_literal + : diag::warn_cxx11_compat_reserved_user_defined_literal) + << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); + return CurPtr; + } + + // C++11 [lex.ext]p10, [usrlit.suffix]p1: A program containing a ud-suffix + // that does not start with an underscore is ill-formed. As a conforming + // extension, we treat all such suffixes as if they had whitespace before + // them. We assume a suffix beginning with a UCN or UTF-8 character is more + // likely to be a ud-suffix than a macro, however, and accept that. + if (!Consumed) { bool IsUDSuffix = false; if (C == '_') IsUDSuffix = true; @@ -1685,16 +1709,22 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr, Diag(CurPtr, getLangOpts().MSVCCompat ? diag::ext_ms_reserved_user_defined_literal : diag::ext_reserved_user_defined_literal) - << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); + << FixItHint::CreateInsertion(getSourceLocation(CurPtr), " "); return CurPtr; } - Result.setFlag(Token::HasUDSuffix); - do { - CurPtr = ConsumeChar(CurPtr, Size, Result); - C = getCharAndSize(CurPtr, Size); - } while (isIdentifierBody(C)); + CurPtr = ConsumeChar(CurPtr, Size, Result); } + + Result.setFlag(Token::HasUDSuffix); + while (true) { + C = getCharAndSize(CurPtr, Size); + if (isIdentifierBody(C)) { CurPtr = ConsumeChar(CurPtr, Size, Result); } + else if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {} + else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {} + else break; + } + return CurPtr; } |

