diff options
author | Jordan Rose <jordan_rose@apple.com> | 2013-01-30 01:52:57 +0000 |
---|---|---|
committer | Jordan Rose <jordan_rose@apple.com> | 2013-01-30 01:52:57 +0000 |
commit | 17441589c324f88c95e98c7e5d8d5e55cdd7c949 (patch) | |
tree | ab02a8bb2bf16030d81bed1f8e093fc974d19293 /clang/lib/Lex/Lexer.cpp | |
parent | 1105821f55d85c49040934eb842b25e124b01766 (diff) | |
download | bcm5719-llvm-17441589c324f88c95e98c7e5d8d5e55cdd7c949.tar.gz bcm5719-llvm-17441589c324f88c95e98c7e5d8d5e55cdd7c949.zip |
Don't warn about Unicode characters in -E mode.
People use the C preprocessor for things other than C files. Some of them
have Unicode characters. We shouldn't warn about Unicode characters
appearing outside of identifiers in this case.
There's not currently a way for the preprocessor to tell if it's in -E mode,
so I added a new flag, derived from the PreprocessorOutputOptions. This is
only used by the Unicode warnings for now, but could conceivably be used by
other warnings or even behavioral differences later.
<rdar://problem/13107323>
llvm-svn: 173881
Diffstat (limited to 'clang/lib/Lex/Lexer.cpp')
-rw-r--r-- | clang/lib/Lex/Lexer.cpp | 38 |
1 files changed, 20 insertions, 18 deletions
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 3e3aaae5f5f..08f406b0691 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2811,14 +2811,13 @@ static bool isUnicodeWhitespace(uint32_t C) { } void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { - if (isUnicodeWhitespace(C)) { - if (!isLexingRawMode()) { - CharSourceRange CharRange = - CharSourceRange::getCharRange(getSourceLocation(), - getSourceLocation(CurPtr)); - Diag(BufferPtr, diag::ext_unicode_whitespace) - << CharRange; - } + if (!isLexingRawMode() && !PP->isPreprocessedOutput() && + isUnicodeWhitespace(C)) { + CharSourceRange CharRange = + CharSourceRange::getCharRange(getSourceLocation(), + getSourceLocation(CurPtr)); + Diag(BufferPtr, diag::ext_unicode_whitespace) + << CharRange; Result.setFlag(Token::LeadingSpace); if (SkipWhitespace(Result, CurPtr)) @@ -2832,7 +2831,8 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { return LexIdentifier(Result, CurPtr); } - if (!isASCII(*BufferPtr) && !isAllowedIDChar(C)) { + if (!isLexingRawMode() && !PP->isPreprocessedOutput() && + !isASCII(*BufferPtr) && !isAllowedIDChar(C)) { // Non-ASCII characters tend to creep into source code unintentionally. // Instead of letting the parser complain about the unknown token, // just drop the character. @@ -2842,13 +2842,11 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) { // loophole in the mapping of Unicode characters to basic character set // characters that allows us to map these particular characters to, say, // whitespace. - if (!isLexingRawMode()) { - CharSourceRange CharRange = - CharSourceRange::getCharRange(getSourceLocation(), - getSourceLocation(CurPtr)); - Diag(BufferPtr, diag::err_non_ascii) - << FixItHint::CreateRemoval(CharRange); - } + CharSourceRange CharRange = + CharSourceRange::getCharRange(getSourceLocation(), + getSourceLocation(CurPtr)); + Diag(BufferPtr, diag::err_non_ascii) + << FixItHint::CreateRemoval(CharRange); BufferPtr = CurPtr; return LexTokenInternal(Result); @@ -3537,11 +3535,15 @@ LexNextToken: if (Status == conversionOK) return LexUnicode(Result, CodePoint, CurPtr); + if (isLexingRawMode() || PP->isPreprocessedOutput()) { + Kind = tok::unknown; + break; + } + // Non-ASCII characters tend to creep into source code unintentionally. // Instead of letting the parser complain about the unknown token, // just diagnose the invalid UTF-8, then drop the character. - if (!isLexingRawMode()) - Diag(CurPtr, diag::err_invalid_utf8); + Diag(CurPtr, diag::err_invalid_utf8); BufferPtr = CurPtr+1; goto LexNextToken; |