diff options
-rw-r--r-- | libjava/ChangeLog | 15 | ||||
-rw-r--r-- | libjava/gnu/java/nio/charset/ISO_8859_1.java | 25 | ||||
-rw-r--r-- | libjava/gnu/java/nio/charset/Provider.java | 47 | ||||
-rw-r--r-- | libjava/gnu/java/nio/charset/US_ASCII.java | 26 | ||||
-rw-r--r-- | libjava/gnu/java/nio/charset/UTF_16.java | 11 | ||||
-rw-r--r-- | libjava/gnu/java/nio/charset/UTF_16BE.java | 15 | ||||
-rw-r--r-- | libjava/gnu/java/nio/charset/UTF_16LE.java | 14 | ||||
-rw-r--r-- | libjava/gnu/java/nio/charset/UTF_8.java | 12 |
8 files changed, 139 insertions, 26 deletions
diff --git a/libjava/ChangeLog b/libjava/ChangeLog index e093db43caa..9de22810639 100644 --- a/libjava/ChangeLog +++ b/libjava/ChangeLog @@ -1,3 +1,18 @@ +2005-02-07 Robert Schuster <thebohemian@gmx.net> + + * gnu/java/nio/charset/ISO_8859_1.java, + gnu/java/nio/charset/US_ASCII.java, + gnu/java/nio/charset/UTF_16.java, + gnu/java/nio/charset/UTF_16_LE.java, + gnu/java/nio/charset/UTF_16_BE.java, + gnu/java/nio/charset/UTF_8.java: Fixed canonical names + and aliases according to + "http://www.iana.org/assignments/character-sets", + "http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html" + and "http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL". + * gnu/java/nio/charset/Provider.java: Made charset lookup + case-insensitive which fixes bug #11740. + 2005-02-07 Tom Tromey <tromey@redhat.com> PR libgcj/19611: diff --git a/libjava/gnu/java/nio/charset/ISO_8859_1.java b/libjava/gnu/java/nio/charset/ISO_8859_1.java index a7fcb636353..45fe5cc6fba 100644 --- a/libjava/gnu/java/nio/charset/ISO_8859_1.java +++ b/libjava/gnu/java/nio/charset/ISO_8859_1.java @@ -1,5 +1,5 @@ /* ISO_8859_1.java -- - Copyright (C) 2002, 2004 Free Software Foundation, Inc. + Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -53,7 +53,28 @@ final class ISO_8859_1 extends Charset { ISO_8859_1 () { - super ("ISO-8859-1", new String[]{"ISO-LATIN-1"}); + /* Canonical charset name chosen according to: + * http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html + */ + super ("ISO-8859-1", new String[] { + /* These names are provided by + * http://www.iana.org/assignments/character-sets + */ + "iso-ir-100", + "ISO_8859-1", + "latin1", + "l1", + "IBM819", + "CP819", + "csISOLatin1", + "8859_1", + /* These names are provided by + * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL + */ + "ISO8859_1", "ISO_8859_1", "ibm-819", "ISO_8859-1:1987", + "819" + }); + } public boolean contains (Charset cs) diff --git a/libjava/gnu/java/nio/charset/Provider.java b/libjava/gnu/java/nio/charset/Provider.java index 13f637113e5..1a5606813cb 100644 --- a/libjava/gnu/java/nio/charset/Provider.java +++ b/libjava/gnu/java/nio/charset/Provider.java @@ -1,5 +1,5 @@ /* Provider.java -- - Copyright (C) 2002 Free Software Foundation, Inc. + Copyright (C) 2002, 2005 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -48,6 +48,7 @@ import java.util.Iterator; * {@link Charset#charsetForName} and * {@link Charset#availableCharsets}. * * @author Jesse Rosenstock + * @author Robert Schuster (thebohemian@gmx.net) * @see Charset */ public final class Provider extends CharsetProvider @@ -63,12 +64,14 @@ public final class Provider extends CharsetProvider } /** - * Map from charset name to charset canonical name. + * Map from charset name to charset canonical name. The strings + * are all lower-case to allow case-insensitive retrieval of + * Charset instances. */ private final HashMap canonicalNames; /** - * Map from canonical name to Charset. + * Map from lower-case canonical name to Charset. * TODO: We may want to use soft references. We would then need to keep * track of the class name to regenerate the object. */ @@ -76,8 +79,6 @@ public final class Provider extends CharsetProvider private Provider () { - // FIXME: We might need to make the name comparison case insensitive. - // Verify this with the Sun JDK. canonicalNames = new HashMap (); charsets = new HashMap (); @@ -106,24 +107,42 @@ public final class Provider extends CharsetProvider .iterator (); } + /** + * Returns a Charset instance by converting the given + * name to lower-case, looking up the canonical charset + * name and finally looking up the Charset with that name. + * + * <p>The lookup is therefore case-insensitive.</p> + * + * @returns The Charset having <code>charsetName</code> + * as its alias or null if no such Charset exist. + */ public Charset charsetForName (String charsetName) { - return (Charset) charsets.get (canonicalize (charsetName)); - } - - private Object canonicalize (String charsetName) - { - Object o = canonicalNames.get (charsetName); - return o == null ? charsetName : o; + return (Charset) charsets.get(canonicalNames.get(charsetName.toLowerCase())); } + /** + * Puts a Charset under its canonical name into the 'charsets' map. + * Then puts a mapping from all its alias names to the canonical name. + * + * <p>All names are converted to lower-case</p>. + * + * @param cs + */ private void addCharset (Charset cs) { - String canonicalName = cs.name (); + String canonicalName = cs.name().toLowerCase(); charsets.put (canonicalName, cs); + + /* Adds a mapping between the canonical name + * itself making a lookup using that name + * no special case. + */ + canonicalNames.put(canonicalName, canonicalName); for (Iterator i = cs.aliases ().iterator (); i.hasNext (); ) - canonicalNames.put (i.next (), canonicalName); + canonicalNames.put (((String) i.next()).toLowerCase(), canonicalName); } public static synchronized Provider provider () diff --git a/libjava/gnu/java/nio/charset/US_ASCII.java b/libjava/gnu/java/nio/charset/US_ASCII.java index 9efb8ecf6f8..f2c66960b6d 100644 --- a/libjava/gnu/java/nio/charset/US_ASCII.java +++ b/libjava/gnu/java/nio/charset/US_ASCII.java @@ -1,5 +1,5 @@ /* US_ASCII.java -- - Copyright (C) 2002, 2004 Free Software Foundation, Inc. + Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -53,7 +53,29 @@ final class US_ASCII extends Charset { US_ASCII () { - super ("US-ASCII", new String[]{"ISO646-US"}); + /* Canonical charset name chosen according to: + * http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html + */ + super ("US-ASCII", new String[] { + /* These names are provided by + * http://www.iana.org/assignments/character-sets + */ + "iso-ir-6", + "ANSI_X3.4-1986", + "ISO_646.irv:1991", + "ASCII", + "ISO646-US", + "ASCII", + "us", + "IBM367", + "cp367", + "csASCII", + /* These names are provided by + * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL + */ + "ANSI_X3.4-1968", "iso_646.irv:1983", "ascii7", "646", + "windows-20127" + }); } public boolean contains (Charset cs) diff --git a/libjava/gnu/java/nio/charset/UTF_16.java b/libjava/gnu/java/nio/charset/UTF_16.java index aebd14608bd..1737101d75d 100644 --- a/libjava/gnu/java/nio/charset/UTF_16.java +++ b/libjava/gnu/java/nio/charset/UTF_16.java @@ -1,5 +1,5 @@ /* UTF_16.java -- - Copyright (C) 2002, 2004 Free Software Foundation, Inc. + Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -51,7 +51,14 @@ final class UTF_16 extends Charset { UTF_16 () { - super ("UTF-16", null); + super ("UTF-16", new String[] { + // witnessed by the internet + "UTF16", + /* These names are provided by + * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL + */ + "ISO-10646-UCS-2", "unicode", "csUnicode", "ucs-2" + }); } public boolean contains (Charset cs) diff --git a/libjava/gnu/java/nio/charset/UTF_16BE.java b/libjava/gnu/java/nio/charset/UTF_16BE.java index efd84fe3fd9..dc0d1369850 100644 --- a/libjava/gnu/java/nio/charset/UTF_16BE.java +++ b/libjava/gnu/java/nio/charset/UTF_16BE.java @@ -1,5 +1,5 @@ /* UTF_16BE.java -- - Copyright (C) 2002, 2004 Free Software Foundation, Inc. + Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -51,7 +51,18 @@ final class UTF_16BE extends Charset { UTF_16BE () { - super ("UTF-16BE", null); + super ("UTF-16BE", new String[] { + // witnessed by the internet + "UTF16BE", + /* These names are provided by + * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL + */ + "x-utf-16be", "ibm-1200", "ibm-1201", "ibm-5297", + "ibm-13488", "ibm-17584", "windows-1201", "cp1200", "cp1201", + "UTF16_BigEndian", + // see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html + "UnicodeBigUnmarked" + }); } public boolean contains (Charset cs) diff --git a/libjava/gnu/java/nio/charset/UTF_16LE.java b/libjava/gnu/java/nio/charset/UTF_16LE.java index 6eafbbcc4f1..87ceab3033e 100644 --- a/libjava/gnu/java/nio/charset/UTF_16LE.java +++ b/libjava/gnu/java/nio/charset/UTF_16LE.java @@ -1,5 +1,5 @@ /* UTF_16LE.java -- - Copyright (C) 2002, 2004 Free Software Foundation, Inc. + Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -51,7 +51,17 @@ final class UTF_16LE extends Charset { UTF_16LE () { - super ("UTF-16LE", null); + super ("UTF-16LE", new String[] { + // witnessed by the internet + "UTF16LE", + /* These names are provided by + * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL + */ + "x-utf-16le", "ibm-1202", "ibm-13490", "ibm-17586", + "UTF16_LittleEndian", + // see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html + "UnicodeLittleUnmarked" + }); } public boolean contains (Charset cs) diff --git a/libjava/gnu/java/nio/charset/UTF_8.java b/libjava/gnu/java/nio/charset/UTF_8.java index acb34e2bbce..c2714363949 100644 --- a/libjava/gnu/java/nio/charset/UTF_8.java +++ b/libjava/gnu/java/nio/charset/UTF_8.java @@ -1,5 +1,5 @@ /* UTF_8.java -- - Copyright (C) 2002, 2004 Free Software Foundation, Inc. + Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -62,7 +62,15 @@ final class UTF_8 extends Charset { UTF_8 () { - super ("UTF-8", null); + super ("UTF-8", new String[] { + /* These names are provided by + * http://oss.software.ibm.com/cgi-bin/icu/convexp?s=ALL + */ + "ibm-1208", "ibm-1209", "ibm-5304", "ibm-5305", + "windows-65001", "cp1208", + // see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html + "UTF8" + }); } public boolean contains (Charset cs) |