Don't warn about Unicode characters in -E mode.

People use the C preprocessor for things other than C files. Some of them have Unicode characters. We shouldn't warn about Unicode characters appearing outside of identifiers in this case. There's not currently a way for the preprocessor to tell if it's in -E mode, so I added a new flag, derived from the PreprocessorOutputOptions. This is only used by the Unicode warnings for now, but could conceivably be used by other warnings or even behavioral differences later. <rdar://problem/13107323> llvm-svn: 173881
author: Jordan Rose <jordan_rose@apple.com> 2013-01-30 01:52:57 +0000
committer: Jordan Rose <jordan_rose@apple.com> 2013-01-30 01:52:57 +0000
commit: 17441589c324f88c95e98c7e5d8d5e55cdd7c949 (patch)
tree: ab02a8bb2bf16030d81bed1f8e093fc974d19293 /clang/lib/Lex/Lexer.cpp
parent: 1105821f55d85c49040934eb842b25e124b01766 (diff)
download: bcm5719-llvm-17441589c324f88c95e98c7e5d8d5e55cdd7c949.tar.gz
bcm5719-llvm-17441589c324f88c95e98c7e5d8d5e55cdd7c949.zip
1 files changed, 20 insertions, 18 deletions
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 3e3aaae5f5f..08f406b0691 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2811,14 +2811,13 @@ static bool isUnicodeWhitespace(uint32_t C) {
 }
 
 void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
-  if (isUnicodeWhitespace(C)) {
-    if (!isLexingRawMode()) {
-      CharSourceRange CharRange =
-        CharSourceRange::getCharRange(getSourceLocation(),
-                                      getSourceLocation(CurPtr));
-      Diag(BufferPtr, diag::ext_unicode_whitespace)
-        << CharRange;
-    }
+  if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
+      isUnicodeWhitespace(C)) {
+    CharSourceRange CharRange =
+      CharSourceRange::getCharRange(getSourceLocation(),
+                                    getSourceLocation(CurPtr));
+    Diag(BufferPtr, diag::ext_unicode_whitespace)
+      << CharRange;
 
     Result.setFlag(Token::LeadingSpace);
     if (SkipWhitespace(Result, CurPtr))
@@ -2832,7 +2831,8 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
     return LexIdentifier(Result, CurPtr);
   }
 
-  if (!isASCII(*BufferPtr) && !isAllowedIDChar(C)) {
+  if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
+      !isASCII(*BufferPtr) && !isAllowedIDChar(C)) {
     // Non-ASCII characters tend to creep into source code unintentionally.
     // Instead of letting the parser complain about the unknown token,
     // just drop the character.
@@ -2842,13 +2842,11 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
     // loophole in the mapping of Unicode characters to basic character set
     // characters that allows us to map these particular characters to, say,
     // whitespace.
-    if (!isLexingRawMode()) {
-      CharSourceRange CharRange =
-        CharSourceRange::getCharRange(getSourceLocation(),
-                                      getSourceLocation(CurPtr));
-      Diag(BufferPtr, diag::err_non_ascii)
-        << FixItHint::CreateRemoval(CharRange);
-    }
+    CharSourceRange CharRange =
+      CharSourceRange::getCharRange(getSourceLocation(),
+                                    getSourceLocation(CurPtr));
+    Diag(BufferPtr, diag::err_non_ascii)
+      << FixItHint::CreateRemoval(CharRange);
 
     BufferPtr = CurPtr;
     return LexTokenInternal(Result);
@@ -3537,11 +3535,15 @@ LexNextToken:
     if (Status == conversionOK)
       return LexUnicode(Result, CodePoint, CurPtr);
     
+    if (isLexingRawMode() || PP->isPreprocessedOutput()) {
+      Kind = tok::unknown;
+      break;
+    }
+
     // Non-ASCII characters tend to creep into source code unintentionally.
     // Instead of letting the parser complain about the unknown token,
     // just diagnose the invalid UTF-8, then drop the character.
-    if (!isLexingRawMode())
-      Diag(CurPtr, diag::err_invalid_utf8);
+    Diag(CurPtr, diag::err_invalid_utf8);
 
     BufferPtr = CurPtr+1;
     goto LexNextToken;
author	Jordan Rose <jordan_rose@apple.com>	2013-01-30 01:52:57 +0000
committer	Jordan Rose <jordan_rose@apple.com>	2013-01-30 01:52:57 +0000
commit	17441589c324f88c95e98c7e5d8d5e55cdd7c949 (patch)
tree	ab02a8bb2bf16030d81bed1f8e093fc974d19293 /clang/lib/Lex/Lexer.cpp
parent	1105821f55d85c49040934eb842b25e124b01766 (diff)
download	bcm5719-llvm-17441589c324f88c95e98c7e5d8d5e55cdd7c949.tar.gz bcm5719-llvm-17441589c324f88c95e98c7e5d8d5e55cdd7c949.zip