diff options
author | Richard Smith <richard-llvm@metafoo.co.uk> | 2018-09-07 19:25:39 +0000 |
---|---|---|
committer | Richard Smith <richard-llvm@metafoo.co.uk> | 2018-09-07 19:25:39 +0000 |
commit | 8ed7776bc404719a43a9dc53283c7817ae5bfee0 (patch) | |
tree | 3ffabee761205086f5bf876b9614b0d8fe974f0f /clang | |
parent | ecf2e2fe312a5dd297e6c0c78b89c98cf1073b0d (diff) | |
download | bcm5719-llvm-8ed7776bc404719a43a9dc53283c7817ae5bfee0.tar.gz bcm5719-llvm-8ed7776bc404719a43a9dc53283c7817ae5bfee0.zip |
PR38870: Add warning for zero-width unicode characters appearing in
identifiers.
llvm-svn: 341700
Diffstat (limited to 'clang')
-rw-r--r-- | clang/include/clang/Basic/DiagnosticLexKinds.td | 3 | ||||
-rw-r--r-- | clang/lib/Lex/Lexer.cpp | 21 | ||||
-rw-r--r-- | clang/test/Lexer/unicode.c | 7 |
3 files changed, 28 insertions, 3 deletions
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 1c960711bcc..8cf6d7e7c09 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -122,6 +122,9 @@ def ext_unicode_whitespace : ExtWarn< def warn_utf8_symbol_homoglyph : Warning< "treating Unicode character <U+%0> as identifier character rather than " "as '%1' symbol">, InGroup<DiagGroup<"unicode-homoglyph">>; +def warn_utf8_symbol_zero_width : Warning< + "identifier contains Unicode character <U+%0> that is invisible in " + "some environments">, InGroup<DiagGroup<"unicode-zero-width">>; def err_hex_escape_no_digits : Error< "\\%0 used with no following hex digits">; diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index e8588a771a4..6a69bb4974a 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -1510,8 +1510,17 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, bool operator<(HomoglyphPair R) const { return Character < R.Character; } }; static constexpr HomoglyphPair SortedHomoglyphs[] = { + {U'\u00ad', 0}, // SOFT HYPHEN {U'\u01c3', '!'}, // LATIN LETTER RETROFLEX CLICK {U'\u037e', ';'}, // GREEK QUESTION MARK + {U'\u200b', 0}, // ZERO WIDTH SPACE + {U'\u200c', 0}, // ZERO WIDTH NON-JOINER + {U'\u200d', 0}, // ZERO WIDTH JOINER + {U'\u2060', 0}, // WORD JOINER + {U'\u2061', 0}, // FUNCTION APPLICATION + {U'\u2062', 0}, // INVISIBLE TIMES + {U'\u2063', 0}, // INVISIBLE SEPARATOR + {U'\u2064', 0}, // INVISIBLE PLUS {U'\u2212', '-'}, // MINUS SIGN {U'\u2215', '/'}, // DIVISION SLASH {U'\u2216', '\\'}, // SET MINUS @@ -1521,6 +1530,7 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, {U'\u2236', ':'}, // RATIO {U'\u223c', '~'}, // TILDE OPERATOR {U'\ua789', ':'}, // MODIFIER LETTER COLON + {U'\ufeff', 0}, // ZERO WIDTH NO-BREAK SPACE {U'\uff01', '!'}, // FULLWIDTH EXCLAMATION MARK {U'\uff03', '#'}, // FULLWIDTH NUMBER SIGN {U'\uff04', '$'}, // FULLWIDTH DOLLAR SIGN @@ -1560,9 +1570,14 @@ static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, llvm::raw_svector_ostream CharOS(CharBuf); llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4); } - const char LooksLikeStr[] = {Homoglyph->LooksLike, 0}; - Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph) - << Range << CharBuf << LooksLikeStr; + if (Homoglyph->LooksLike) { + const char LooksLikeStr[] = {Homoglyph->LooksLike, 0}; + Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_homoglyph) + << Range << CharBuf << LooksLikeStr; + } else { + Diags.Report(Range.getBegin(), diag::warn_utf8_symbol_zero_width) + << Range << CharBuf; + } } } diff --git a/clang/test/Lexer/unicode.c b/clang/test/Lexer/unicode.c index 30e353fa797..bebab829880 100644 --- a/clang/test/Lexer/unicode.c +++ b/clang/test/Lexer/unicode.c @@ -38,3 +38,10 @@ int n; = 3; // expected-warning {{treating Unicode character <U+037E> as identi int *n꞉꞉v = &n;; // expected-warning 2{{treating Unicode character <U+A789> as identifier character rather than as ':' symbol}} // expected-warning@-1 {{treating Unicode character <U+037E> as identifier character rather than as ';' symbol}} int v=[=](auto){return~x;}(); // expected-warning 12{{treating Unicode character}} + +int xx; +// expected-warning@-1 {{identifier contains Unicode character <U+2060> that is invisible in some environments}} +// expected-warning@-2 {{identifier contains Unicode character <U+FEFF> that is invisible in some environments}} +// expected-warning@-3 {{identifier contains Unicode character <U+200D> that is invisible in some environments}} +int foobar = 0; // expected-warning {{identifier contains Unicode character <U+200B> that is invisible in some environments}} +int x = foobar; // expected-error {{undeclared identifier}} |