diff options
| author | tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4> | 2001-06-20 16:21:24 +0000 |
|---|---|---|
| committer | tromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4> | 2001-06-20 16:21:24 +0000 |
| commit | 7f0fcd7a679c2ccbd25f6eaf1daf1f0a86f15f06 (patch) | |
| tree | 532ce701f09afb858ec0ca8628468260e7cbbb80 | |
| parent | 6716938025a7f760caab8c50164446f9bc52b81d (diff) | |
| download | ppe42-gcc-7f0fcd7a679c2ccbd25f6eaf1daf1f0a86f15f06.tar.gz ppe42-gcc-7f0fcd7a679c2ccbd25f6eaf1daf1f0a86f15f06.zip | |
* lex.c (java_read_char): Disallow invalid and overlong
sequences. Fixes PR java/2319.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@43475 138bc75d-0d04-0410-961f-82ee72b054a4
| -rw-r--r-- | gcc/java/ChangeLog | 5 | ||||
| -rw-r--r-- | gcc/java/lex.c | 33 |
2 files changed, 28 insertions, 10 deletions
diff --git a/gcc/java/ChangeLog b/gcc/java/ChangeLog index f55ad6cf168..96e1ba5743c 100644 --- a/gcc/java/ChangeLog +++ b/gcc/java/ChangeLog @@ -1,3 +1,8 @@ +2001-06-19 Tom Tromey <tromey@redhat.com> + + * lex.c (java_read_char): Disallow invalid and overlong + sequences. Fixes PR java/2319. + 2001-06-05 Jeff Sturm <jsturm@one-point.com> * decl.c (create_primitive_vtable): Don't call make_decl_rtl. diff --git a/gcc/java/lex.c b/gcc/java/lex.c index 28a73e3874b..35cd31749ca 100644 --- a/gcc/java/lex.c +++ b/gcc/java/lex.c @@ -454,15 +454,21 @@ java_read_char (lex) if (c == EOF) return UEOF; if (c < 128) - return (unicode_t)c; + return (unicode_t) c; else { if ((c & 0xe0) == 0xc0) { c1 = getc (lex->finput); if ((c1 & 0xc0) == 0x80) - return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f)); - c = c1; + { + unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f)); + /* Check for valid 2-byte characters. We explicitly + allow \0 because this encoding is common in the + Java world. */ + if (r == 0 || (r >= 0x80 && r <= 0x7ff)) + return r; + } } else if ((c & 0xf0) == 0xe0) { @@ -471,16 +477,23 @@ java_read_char (lex) { c2 = getc (lex->finput); if ((c2 & 0xc0) == 0x80) - return (unicode_t)(((c & 0xf) << 12) + - (( c1 & 0x3f) << 6) + (c2 & 0x3f)); - else - c = c2; + { + unicode_t r = (unicode_t)(((c & 0xf) << 12) + + (( c1 & 0x3f) << 6) + + (c2 & 0x3f)); + /* Check for valid 3-byte characters. + Don't allow surrogate, \ufffe or \uffff. */ + if (r >= 0x800 && r <= 0xffff + && ! (r >= 0xd800 && r <= 0xdfff) + && r != 0xfffe && r != 0xffff) + return r; + } } - else - c = c1; } - /* We simply don't support invalid characters. */ + /* We simply don't support invalid characters. We also + don't support 4-, 5-, or 6-byte UTF-8 sequences, as these + cannot be valid Java characters. */ java_lex_error ("malformed UTF-8 character", 0); } } |

