summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Smith <richard-llvm@metafoo.co.uk>2018-09-07 19:25:39 +0000
committerRichard Smith <richard-llvm@metafoo.co.uk>2018-09-07 19:25:39 +0000
commit8ed7776bc404719a43a9dc53283c7817ae5bfee0 (patch)
tree3ffabee761205086f5bf876b9614b0d8fe974f0f
parentecf2e2fe312a5dd297e6c0c78b89c98cf1073b0d (diff)
downloadbcm5719-llvm-8ed7776bc404719a43a9dc53283c7817ae5bfee0.tar.gz
bcm5719-llvm-8ed7776bc404719a43a9dc53283c7817ae5bfee0.zip
PR38870: Add warning for zero-width unicode characters appearing in
identifiers. llvm-svn: 341700
-rw-r--r--clang/include/clang/Basic/DiagnosticLexKinds.td3
-rw-r--r--clang/lib/Lex/Lexer.cpp21
-rw-r--r--clang/test/Lexer/unicode.c7
3 files changed, 28 insertions, 3 deletions
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 1c960711bcc..8cf6d7e7c09 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -122,6 +122,9 @@ def ext_unicode_whitespace : ExtWarn<
def warn_utf8_symbol_homoglyph : Warning<
"treating Unicode character <U+%0> as identifier character rather than "
"as '%1' symbol">, InGroup<DiagGroup<"unicode-homoglyph">>;
+def warn_utf8_symbol_zero_width : Warning<
+ "identifier contains Unicode character <U+%0> that is invisible in "
+ "some environments">, InGroup<DiagGroup<"unicode-zero-width">>;
def err_hex_escape_no_digits : Error<
"\\%0 used with no following hex digits">;
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index e8588a771a4..6a69bb4974a 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -1510,8 +1510,17 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C,
bool operator<(HomoglyphPair R) const { return Character < R.Character; }
};
static constexpr HomoglyphPair SortedHomoglyphs[] = {
+ {U'\u00ad', 0}, // SOFT HYPHEN
{U'\u01c3', '!'}, // LATIN LETTER RETROFLEX CLICK
{U'\u037e', ';'}, // GREEK QUESTION MARK
+ {U'\u200b', 0}, // ZERO WIDTH SPACE
+ {U'\u200c', 0}, // ZERO WIDTH NON-JOINER
+ {U'\u200d', 0}, // ZERO WIDTH JOINER
+ {U'\u2060', 0}, // WORD JOINER
+ {U'\u2061', 0}, // FUNCTION APPLICATION
+ {U'\u2062', 0}, // INVISIBLE TIMES
+ {U'\u2063', 0}, // INVISIBLE SEPARATOR
+ {U'\u2064', 0}, // INVISIBLE PLUS
{U'\u2212', '-'}, // MINUS SIGN
{U'\u2215', '/'}, // DIVISION SLASH
{U'\u2216', '\\'}, // SET MINUS
@@ -1521,6 +1530,7 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C,
{U'\u2236', ':'}, // RATIO
{U'\u223c', '~'}, // TILDE OPERATOR
{U'\ua789', ':'}, // MODIFIER LETTER COLON
+ {U'\ufeff', 0}, // ZERO WIDTH NO-BREAK SPACE
{U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK
{U'\uff03', '#'}, // FULLWIDTH NUMBER SIGN
{U'\uff04', '$'}, // FULLWIDTH DOLLAR SIGN
@@ -1560,9 +1570,14 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C,
llvm::raw_svector_ostream CharOS(CharBuf);
llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4);
}
- const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
- Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
- << Range << CharBuf << LooksLikeStr;
+ if (Homoglyph->LooksLike) {
+ const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
+ Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph)
+ << Range << CharBuf << LooksLikeStr;
+ } else {
+ Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width)
+ << Range << CharBuf;
+ }
}
}
diff --git a/clang/test/Lexer/unicode.c b/clang/test/Lexer/unicode.c
index 30e353fa797..bebab829880 100644
--- a/clang/test/Lexer/unicode.c
+++ b/clang/test/Lexer/unicode.c
@@ -38,3 +38,10 @@ int n; = 3; // expected-warning {{treating Unicode character <U+037E> as identi
int *n꞉꞉v = &n;; // expected-warning 2{{treating Unicode character <U+A789> as identifier character rather than as ':' symbol}}
// expected-warning@-1 {{treating Unicode character <U+037E> as identifier character rather than as ';' symbol}}
int v=[=](auto){return~x;}(); // expected-warning 12{{treating Unicode character}}
+
+int ⁠xx‍;
+// expected-warning@-1 {{identifier contains Unicode character <U+2060> that is invisible in some environments}}
+// expected-warning@-2 {{identifier contains Unicode character <U+FEFF> that is invisible in some environments}}
+// expected-warning@-3 {{identifier contains Unicode character <U+200D> that is invisible in some environments}}
+int foo​bar = 0; // expected-warning {{identifier contains Unicode character <U+200B> that is invisible in some environments}}
+int x = foobar; // expected-error {{undeclared identifier}}
OpenPOWER on IntegriCloud