diff options
Diffstat (limited to 'libjava/java/text/RuleBasedCollator.java')
-rw-r--r-- | libjava/java/text/RuleBasedCollator.java | 1017 |
1 files changed, 0 insertions, 1017 deletions
diff --git a/libjava/java/text/RuleBasedCollator.java b/libjava/java/text/RuleBasedCollator.java deleted file mode 100644 index ae84a41032d..00000000000 --- a/libjava/java/text/RuleBasedCollator.java +++ /dev/null @@ -1,1017 +0,0 @@ -/* RuleBasedCollator.java -- Concrete Collator Class - Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. - -This file is part of GNU Classpath. - -GNU Classpath is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GNU Classpath is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU Classpath; see the file COPYING. If not, write to the -Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -02110-1301 USA. - -Linking this library statically or dynamically with other modules is -making a combined work based on this library. Thus, the terms and -conditions of the GNU General Public License cover the whole -combination. - -As a special exception, the copyright holders of this library give you -permission to link this library with independent modules to produce an -executable, regardless of the license terms of these independent -modules, and to copy and distribute the resulting executable under -terms of your choice, provided that you also meet, for each linked -independent module, the terms and conditions of the license of that -module. An independent module is a module which is not derived from -or based on this library. If you modify this library, you may extend -this exception to your version of the library, but you are not -obligated to do so. If you do not wish to do so, delete this -exception statement from your version. */ - - -package java.text; - -import java.util.ArrayList; -import java.util.HashMap; - -/* Written using "Java Class Libraries", 2nd edition, plus online - * API docs for JDK 1.2 from http://www.javasoft.com. - * Status: Believed complete and correct - */ - -/** - * This class is a concrete subclass of <code>Collator</code> suitable - * for string collation in a wide variety of languages. An instance of - * this class is normally returned by the <code>getInstance</code> method - * of <code>Collator</code> with rules predefined for the requested - * locale. However, an instance of this class can be created manually - * with any desired rules. - * <p> - * Rules take the form of a <code>String</code> with the following syntax - * <ul> - * <li> Modifier: '@'</li> - * <li> Relation: '<' | ';' | ',' | '=' : <text></li> - * <li> Reset: '&' : <text></li> - * </ul> - * The modifier character indicates that accents sort backward as is the - * case with French. The modifier applies to all rules <b>after</b> - * the modifier but before the next primary sequence. If placed at the end - * of the sequence if applies to all unknown accented character. - * The relational operators specify how the text - * argument relates to the previous term. The relation characters have - * the following meanings: - * <ul> - * <li>'<' - The text argument is greater than the prior term at the primary - * difference level.</li> - * <li>';' - The text argument is greater than the prior term at the secondary - * difference level.</li> - * <li>',' - The text argument is greater than the prior term at the tertiary - * difference level.</li> - * <li>'=' - The text argument is equal to the prior term</li> - * </ul> - * <p> - * As for the text argument itself, this is any sequence of Unicode - * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F, - * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are - * desired, they must be enclosed in single quotes. If any whitespace is - * encountered, it is ignored. (For example, "a b" is equal to "ab"). - * <p> - * The reset operation inserts the following rule at the point where the - * text argument to it exists in the previously declared rule string. This - * makes it easy to add new rules to an existing string by simply including - * them in a reset sequence at the end. Note that the text argument, or - * at least the first character of it, must be present somewhere in the - * previously declared rules in order to be inserted properly. If this - * is not satisfied, a <code>ParseException</code> will be thrown. - * <p> - * This system of configuring <code>RuleBasedCollator</code> is needlessly - * complex and the people at Taligent who developed it (along with the folks - * at Sun who accepted it into the Java standard library) deserve a slow - * and agonizing death. - * <p> - * Here are a couple of example of rule strings: - * <p> - * "< a < b < c" - This string says that a is greater than b which is - * greater than c, with all differences being primary differences. - * <p> - * "< a,A < b,B < c,C" - This string says that 'A' is greater than 'a' with - * a tertiary strength comparison. Both 'b' and 'B' are greater than 'a' and - * 'A' during a primary strength comparison. But 'B' is greater than 'b' - * under a tertiary strength comparison. - * <p> - * "< a < c & a < b " - This sequence is identical in function to the - * "< a < b < c" rule string above. The '&' reset symbol indicates that - * the rule "< b" is to be inserted after the text argument "a" in the - * previous rule string segment. - * <p> - * "< a < b & y < z" - This is an error. The character 'y' does not appear - * anywhere in the previous rule string segment so the rule following the - * reset rule cannot be inserted. - * <p> - * "< a & A @ < e & E < f& F" - This sequence is equivalent to the following - * "< a & A < E & e < f & F". - * <p> - * For a description of the various comparison strength types, see the - * documentation for the <code>Collator</code> class. - * <p> - * As an additional complication to this already overly complex rule scheme, - * if any characters precede the first rule, these characters are considered - * ignorable. They will be treated as if they did not exist during - * comparisons. For example, "- < a < b ..." would make '-' an ignorable - * character such that the strings "high-tech" and "hightech" would - * be considered identical. - * <p> - * A <code>ParseException</code> will be thrown for any of the following - * conditions: - * <ul> - * <li>Unquoted punctuation characters in a text argument.</li> - * <li>A relational or reset operator not followed by a text argument</li> - * <li>A reset operator where the text argument is not present in - * the previous rule string section.</li> - * </ul> - * - * @author Aaron M. Renn (arenn@urbanophile.com) - * @author Tom Tromey (tromey@cygnus.com) - * @author Guilhem Lavaux (guilhem@kaffe.org) - */ -public class RuleBasedCollator extends Collator -{ - /** - * This class describes what rank has a character (or a sequence of characters) - * in the lexicographic order. Each element in a rule has a collation element. - */ - static final class CollationElement - { - String key; - int primary; - short secondary; - short tertiary; - short equality; - boolean ignore; - String expansion; - - CollationElement(String key, int primary, short secondary, short tertiary, - short equality, String expansion, boolean ignore) - { - this.key = key; - this.primary = primary; - this.secondary = secondary; - this.tertiary = tertiary; - this.equality = equality; - this.ignore = ignore; - this.expansion = expansion; - } - - int getValue() - { - return (primary << 16) + (secondary << 8) + tertiary; - } - } - - /** - * Basic collation instruction (internal format) to build the series of - * collation elements. It contains an instruction which specifies the new - * state of the generator. The sequence of instruction should not contain - * RESET (it is used by - * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)}) - * as a temporary state while merging two sets of instructions. - */ - static final class CollationSorter - { - static final int GREATERP = 0; - static final int GREATERS = 1; - static final int GREATERT = 2; - static final int EQUAL = 3; - static final int RESET = 4; - static final int INVERSE_SECONDARY = 5; - - int comparisonType; - String textElement; - int hashText; - int offset; - boolean ignore; - - String expansionOrdering; - } - - /** - * This the the original rule string. - */ - private String rules; - - /** - * This is the table of collation element values - */ - private Object[] ce_table; - - /** - * Quick-prefix finder. - */ - HashMap prefix_tree; - - /** - * This is the value of the last sequence entered into - * <code>ce_table</code>. It is used to compute the - * ordering value of unspecified character. - */ - private int last_primary_value; - - /** - * This is the value of the last secondary sequence of the - * primary 0, entered into - * <code>ce_table</code>. It is used to compute the - * ordering value of an unspecified accented character. - */ - private int last_tertiary_value; - - /** - * This variable is true if accents need to be sorted - * in the other direction. - */ - private boolean inverseAccentComparison; - - /** - * This collation element is special to unknown sequence. - * The JDK uses it to mark and sort the characters which has - * no collation rules. - */ - static final CollationElement SPECIAL_UNKNOWN_SEQ = - new CollationElement("", (short) 32767, (short) 0, (short) 0, - (short) 0, null, false); - - /** - * This method initializes a new instance of <code>RuleBasedCollator</code> - * with the specified collation rules. Note that an application normally - * obtains an instance of <code>RuleBasedCollator</code> by calling the - * <code>getInstance</code> method of <code>Collator</code>. That method - * automatically loads the proper set of rules for the desired locale. - * - * @param rules The collation rule string. - * - * @exception ParseException If the rule string contains syntax errors. - */ - public RuleBasedCollator(String rules) throws ParseException - { - if (rules.equals("")) - throw new ParseException("empty rule set", 0); - - this.rules = rules; - - buildCollationVector(parseString(rules)); - buildPrefixAccess(); - } - - /** - * This method returns the number of common characters at the beginning - * of the string of the two parameters. - * - * @param prefix A string considered as a prefix to test against - * the other string. - * @param s A string to test the prefix against. - * @return The number of common characters. - */ - static int findPrefixLength(String prefix, String s) - { - int index; - int len = prefix.length(); - - for (index = 0; index < len && index < s.length(); ++index) - { - if (prefix.charAt(index) != s.charAt(index)) - return index; - } - - - return index; - } - - /** - * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods - * checks whether it is possible to find an anchor point for the rules to be merged and - * then insert them at that precise point. - * - * @param offset Offset in the string containing rules of the beginning of the rules - * being merged in. - * @param starter Text of the rules being merged. - * @param main Repository of all already parsed rules. - * @param patch Rules to be merged into the repository. - * @throws ParseException if it is impossible to find an anchor point for the new rules. - */ - private void mergeRules(int offset, String starter, ArrayList main, ArrayList patch) - throws ParseException - { - int insertion_point = -1; - int max_length = 0; - - /* We must check that no rules conflict with another already present. If it - * is the case delete the old rule. - */ - - /* For the moment good old O(N^2) algorithm. - */ - for (int i = 0; i < patch.size(); i++) - { - int j = 0; - - while (j < main.size()) - { - CollationSorter rule1 = (CollationSorter) patch.get(i); - CollationSorter rule2 = (CollationSorter) main.get(j); - - if (rule1.textElement.equals(rule2.textElement)) - main.remove(j); - else - j++; - } - } - - // Find the insertion point... O(N) - for (int i = 0; i < main.size(); i++) - { - CollationSorter sorter = (CollationSorter) main.get(i); - int length = findPrefixLength(starter, sorter.textElement); - - if (length > max_length) - { - max_length = length; - insertion_point = i+1; - } - } - - if (insertion_point < 0) - throw new ParseException("no insertion point found for " + starter, offset); - - if (max_length < starter.length()) - { - /* - * We need to expand the first entry. It must be sorted - * like if it was the reference key itself (like the spec - * said. So the first entry is special: the element is - * replaced by the specified text element for the sorting. - * This text replace the old one for comparisons. However - * to preserve the behaviour we replace the first key (corresponding - * to the found prefix) by a new code rightly ordered in the - * sequence. The rest of the subsequence must be appended - * to the end of the sequence. - */ - CollationSorter sorter = (CollationSorter) patch.get(0); - CollationSorter expansionPrefix = - (CollationSorter) main.get(insertion_point-1); - - sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element - - main.add(insertion_point, sorter); - - /* - * This is a new set of rules. Append to the list. - */ - patch.remove(0); - insertion_point++; - } - - // Now insert all elements of patch at the insertion point. - for (int i = 0; i < patch.size(); i++) - main.add(i+insertion_point, patch.get(i)); - } - - /** - * This method parses a string and build a set of sorting instructions. The parsing - * may only be partial on the case the rules are to be merged sometime later. - * - * @param stop_on_reset If this parameter is true then the parser stops when it - * encounters a reset instruction. In the other case, it tries to parse the subrules - * and merged it in the same repository. - * @param v Output vector for the set of instructions. - * @param base_offset Offset in the string to begin parsing. - * @param rules Rules to be parsed. - * @return -1 if the parser reached the end of the string, an integer representing the - * offset in the string at which it stopped parsing. - * @throws ParseException if something turned wrong during the parsing. To get details - * decode the message. - */ - private int subParseString(boolean stop_on_reset, ArrayList v, - int base_offset, String rules) - throws ParseException - { - boolean ignoreChars = (base_offset == 0); - int operator = -1; - StringBuffer sb = new StringBuffer(); - boolean doubleQuote = false; - boolean eatingChars = false; - boolean nextIsModifier = false; - boolean isModifier = false; - int i; - -main_parse_loop: - for (i = 0; i < rules.length(); i++) - { - char c = rules.charAt(i); - int type = -1; - - if (!eatingChars && - ((c >= 0x09 && c <= 0x0D) || (c == 0x20))) - continue; - - isModifier = nextIsModifier; - nextIsModifier = false; - - if (eatingChars && c != '\'') - { - doubleQuote = false; - sb.append(c); - continue; - } - if (doubleQuote && eatingChars) - { - sb.append(c); - doubleQuote = false; - continue; - } - - switch (c) - { - case '!': - throw new ParseException - ("Modifier '!' is not yet supported by Classpath", i + base_offset); - case '<': - type = CollationSorter.GREATERP; - break; - case ';': - type = CollationSorter.GREATERS; - break; - case ',': - type = CollationSorter.GREATERT; - break; - case '=': - type = CollationSorter.EQUAL; - break; - case '\'': - eatingChars = !eatingChars; - doubleQuote = true; - break; - case '@': - if (ignoreChars) - throw new ParseException - ("comparison list has not yet been started. You may only use" - + "(<,;=&)", i + base_offset); - // Inverse the order of secondaries from now on. - nextIsModifier = true; - type = CollationSorter.INVERSE_SECONDARY; - break; - case '&': - type = CollationSorter.RESET; - if (stop_on_reset) - break main_parse_loop; - break; - default: - if (operator < 0) - throw new ParseException - ("operator missing at " + (i + base_offset), i + base_offset); - if (! eatingChars - && ((c >= 0x21 && c <= 0x2F) - || (c >= 0x3A && c <= 0x40) - || (c >= 0x5B && c <= 0x60) - || (c >= 0x7B && c <= 0x7E))) - throw new ParseException - ("unquoted punctuation character '" + c + "'", i + base_offset); - - //type = ignoreChars ? CollationSorter.IGNORE : -1; - sb.append(c); - break; - } - - if (type < 0) - continue; - - if (operator < 0) - { - operator = type; - continue; - } - - if (sb.length() == 0 && !isModifier) - throw new ParseException - ("text element empty at " + (i+base_offset), i+base_offset); - - if (operator == CollationSorter.RESET) - { - /* Reposition in the sorting list at the position - * indicated by the text element. - */ - String subrules = rules.substring(i); - ArrayList sorted_rules = new ArrayList(); - int idx; - - // Parse the subrules but do not iterate through all - // sublist. This is the priviledge of the first call. - idx = subParseString(true, sorted_rules, base_offset+i, subrules); - - // Merge new parsed rules into the list. - mergeRules(base_offset+i, sb.toString(), v, sorted_rules); - sb.setLength(0); - - // Reset state to none. - operator = -1; - type = -1; - // We have found a new subrule at 'idx' but it has not been parsed. - if (idx >= 0) - { - i += idx-1; - continue main_parse_loop; - } - else - // No more rules. - break main_parse_loop; - } - - CollationSorter sorter = new CollationSorter(); - - if (operator == CollationSorter.GREATERP) - ignoreChars = false; - - sorter.comparisonType = operator; - sorter.textElement = sb.toString(); - sorter.hashText = sorter.textElement.hashCode(); - sorter.offset = base_offset+rules.length(); - sorter.ignore = ignoreChars; - sb.setLength(0); - - v.add(sorter); - operator = type; - } - - if (operator >= 0) - { - CollationSorter sorter = new CollationSorter(); - int pos = rules.length() + base_offset; - - if ((sb.length() != 0 && nextIsModifier) - || (sb.length() == 0 && !nextIsModifier && !eatingChars)) - throw new ParseException("text element empty at " + pos, pos); - - if (operator == CollationSorter.GREATERP) - ignoreChars = false; - - sorter.comparisonType = operator; - sorter.textElement = sb.toString(); - sorter.hashText = sorter.textElement.hashCode(); - sorter.offset = base_offset+pos; - sorter.ignore = ignoreChars; - v.add(sorter); - } - - if (i == rules.length()) - return -1; - else - return i; - } - - /** - * This method creates a copy of this object. - * - * @return A copy of this object. - */ - public Object clone() - { - return super.clone(); - } - - /** - * This method completely parses a string 'rules' containing sorting rules. - * - * @param rules String containing the rules to be parsed. - * @return A set of sorting instructions stored in a Vector. - * @throws ParseException if something turned wrong during the parsing. To get details - * decode the message. - */ - private ArrayList parseString(String rules) - throws ParseException - { - ArrayList v = new ArrayList(); - - // result of the first subParseString is not absolute (may be -1 or a - // positive integer). But we do not care. - subParseString(false, v, 0, rules); - - return v; - } - - /** - * This method uses the sorting instructions built by {@link #parseString} - * to build collation elements which can be directly used to sort strings. - * - * @param parsedElements Parsed instructions stored in a ArrayList. - * @throws ParseException if the order of the instructions are not valid. - */ - private void buildCollationVector(ArrayList parsedElements) - throws ParseException - { - int primary_seq = 0; - int last_tertiary_seq = 0; - short secondary_seq = 0; - short tertiary_seq = 0; - short equality_seq = 0; - boolean inverseComparisons = false; - final boolean DECREASING = false; - final boolean INCREASING = true; - boolean secondaryType = INCREASING; - ArrayList v = new ArrayList(); - - // elts is completely sorted. -element_loop: - for (int i = 0; i < parsedElements.size(); i++) - { - CollationSorter elt = (CollationSorter) parsedElements.get(i); - boolean ignoreChar = false; - - switch (elt.comparisonType) - { - case CollationSorter.GREATERP: - primary_seq++; - if (inverseComparisons) - { - secondary_seq = Short.MAX_VALUE; - secondaryType = DECREASING; - } - else - { - secondary_seq = 0; - secondaryType = INCREASING; - } - tertiary_seq = 0; - equality_seq = 0; - inverseComparisons = false; - break; - case CollationSorter.GREATERS: - if (secondaryType == DECREASING) - secondary_seq--; - else - secondary_seq++; - tertiary_seq = 0; - equality_seq = 0; - break; - case CollationSorter.INVERSE_SECONDARY: - inverseComparisons = true; - continue element_loop; - case CollationSorter.GREATERT: - tertiary_seq++; - if (primary_seq == 0) - last_tertiary_seq = tertiary_seq; - equality_seq = 0; - break; - case CollationSorter.EQUAL: - equality_seq++; - break; - case CollationSorter.RESET: - throw new ParseException - ("Invalid reached state 'RESET'. Internal error", elt.offset); - default: - throw new ParseException - ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset); - } - - v.add(new CollationElement(elt.textElement, primary_seq, - secondary_seq, tertiary_seq, - equality_seq, elt.expansionOrdering, elt.ignore)); - } - - this.inverseAccentComparison = inverseComparisons; - - ce_table = v.toArray(); - - last_primary_value = primary_seq+1; - last_tertiary_value = last_tertiary_seq+1; - } - - /** - * Build a tree where all keys are the texts of collation elements and data is - * the collation element itself. The tree is used when extracting all prefix - * for a given text. - */ - private void buildPrefixAccess() - { - prefix_tree = new HashMap(); - - for (int i = 0; i < ce_table.length; i++) - { - CollationElement e = (CollationElement) ce_table[i]; - - prefix_tree.put(e.key, e); - } - } - - /** - * This method returns an integer which indicates whether the first - * specified <code>String</code> is less than, greater than, or equal to - * the second. The value depends not only on the collation rules in - * effect, but also the strength and decomposition settings of this object. - * - * @param source The first <code>String</code> to compare. - * @param target A second <code>String</code> to compare to the first. - * - * @return A negative integer if source < target, a positive integer - * if source > target, or 0 if source == target. - */ - public int compare(String source, String target) - { - CollationElementIterator cs, ct; - CollationElement ord1block = null; - CollationElement ord2block = null; - boolean advance_block_1 = true; - boolean advance_block_2 = true; - - cs = getCollationElementIterator(source); - ct = getCollationElementIterator(target); - - for(;;) - { - int ord1; - int ord2; - - /* - * We have to check whether the characters are ignorable. - * If it is the case then forget them. - */ - if (advance_block_1) - { - ord1block = cs.nextBlock(); - if (ord1block != null && ord1block.ignore) - continue; - } - - if (advance_block_2) - { - ord2block = ct.nextBlock(); - if (ord2block != null && ord2block.ignore) - { - advance_block_1 = false; - continue; - } - } - else - advance_block_2 = true; - - if (!advance_block_1) - advance_block_1 = true; - - if (ord1block != null) - ord1 = ord1block.getValue(); - else - { - if (ord2block == null) - return 0; - return -1; - } - - if (ord2block == null) - return 1; - - ord2 = ord2block.getValue(); - - // We know chars are totally equal, so skip - if (ord1 == ord2) - { - if (getStrength() == IDENTICAL) - if (!ord1block.key.equals(ord2block.key)) - return ord1block.key.compareTo(ord2block.key); - continue; - } - - // Check for primary strength differences - int prim1 = CollationElementIterator.primaryOrder(ord1); - int prim2 = CollationElementIterator.primaryOrder(ord2); - - if (prim1 == 0 && getStrength() < TERTIARY) - { - advance_block_2 = false; - continue; - } - else if (prim2 == 0 && getStrength() < TERTIARY) - { - advance_block_1 = false; - continue; - } - - if (prim1 < prim2) - return -1; - else if (prim1 > prim2) - return 1; - else if (getStrength() == PRIMARY) - continue; - - // Check for secondary strength differences - int sec1 = CollationElementIterator.secondaryOrder(ord1); - int sec2 = CollationElementIterator.secondaryOrder(ord2); - - if (sec1 < sec2) - return -1; - else if (sec1 > sec2) - return 1; - else if (getStrength() == SECONDARY) - continue; - - // Check for tertiary differences - int tert1 = CollationElementIterator.tertiaryOrder(ord1); - int tert2 = CollationElementIterator.tertiaryOrder(ord2); - - if (tert1 < tert2) - return -1; - else if (tert1 > tert2) - return 1; - else if (getStrength() == TERTIARY) - continue; - - // Apparently JDK does this (at least for my test case). - return ord1block.key.compareTo(ord2block.key); - } - } - - /** - * This method tests this object for equality against the specified - * object. This will be true if and only if the specified object is - * another reference to this object. - * - * @param obj The <code>Object</code> to compare against this object. - * - * @return <code>true</code> if the specified object is equal to this object, - * <code>false</code> otherwise. - */ - public boolean equals(Object obj) - { - if (obj == this) - return true; - else - return false; - } - - /** - * This method builds a default collation element without invoking - * the database created from the rules passed to the constructor. - * - * @param c Character which needs a collation element. - * @return A valid brand new CollationElement instance. - */ - CollationElement getDefaultElement(char c) - { - int v; - - // Preliminary support for generic accent sorting inversion (I don't know if all - // characters in the range should be sorted backward). This is the place - // to fix this if needed. - if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361)) - v = 0x0361 - ((int) c - 0x02B9); - else - v = (short) c; - return new CollationElement("" + c, last_primary_value + v, - (short) 0, (short) 0, (short) 0, null, false); - } - - /** - * This method builds a default collation element for an accented character - * without invoking the database created from the rules passed to the constructor. - * - * @param c Character which needs a collation element. - * @return A valid brand new CollationElement instance. - */ - CollationElement getDefaultAccentedElement(char c) - { - int v; - - // Preliminary support for generic accent sorting inversion (I don't know if all - // characters in the range should be sorted backward). This is the place - // to fix this if needed. - if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361)) - v = 0x0361 - ((int) c - 0x02B9); - else - v = (short) c; - return new CollationElement("" + c, (short) 0, - (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false); - } - - /** - * This method returns an instance for <code>CollationElementIterator</code> - * for the specified <code>String</code> under the collation rules for this - * object. - * - * @param source The <code>String</code> to return the - * <code>CollationElementIterator</code> instance for. - * - * @return A <code>CollationElementIterator</code> for the specified - * <code>String</code>. - */ - public CollationElementIterator getCollationElementIterator(String source) - { - return new CollationElementIterator(this, source); - } - - /** - * This method returns an instance of <code>CollationElementIterator</code> - * for the <code>String</code> represented by the specified - * <code>CharacterIterator</code>. - * - * @param source The <code>CharacterIterator</code> with the desired <code>String</code>. - * - * @return A <code>CollationElementIterator</code> for the specified <code>String</code>. - */ - public CollationElementIterator getCollationElementIterator(CharacterIterator source) - { - StringBuffer expand = new StringBuffer(""); - - // Right now we assume that we will read from the beginning of the string. - for (char c = source.first(); - c != CharacterIterator.DONE; - c = source.next()) - decomposeCharacter(c, expand); - - return getCollationElementIterator(expand.toString()); - } - - /** - * This method returns an instance of <code>CollationKey</code> for the - * specified <code>String</code>. The object returned will have a - * more efficient mechanism for its comparison function that could - * provide speed benefits if multiple comparisons are performed, such - * as during a sort. - * - * @param source The <code>String</code> to create a <code>CollationKey</code> for. - * - * @return A <code>CollationKey</code> for the specified <code>String</code>. - */ - public CollationKey getCollationKey(String source) - { - CollationElementIterator cei = getCollationElementIterator(source); - ArrayList vect = new ArrayList(); - - int ord = cei.next(); - cei.reset(); //set to start of string - - while (ord != CollationElementIterator.NULLORDER) - { - // If the primary order is null, it means this is an ignorable - // character. - if (CollationElementIterator.primaryOrder(ord) == 0) - { - ord = cei.next(); - continue; - } - switch (getStrength()) - { - case PRIMARY: - ord = CollationElementIterator.primaryOrder(ord); - break; - - case SECONDARY: - ord = CollationElementIterator.primaryOrder(ord) << 8; - ord |= CollationElementIterator.secondaryOrder(ord); - - default: - break; - } - - vect.add(new Integer(ord)); - ord = cei.next(); //increment to next key - } - - Object[] objarr = vect.toArray(); - byte[] key = new byte[objarr.length * 4]; - - for (int i = 0; i < objarr.length; i++) - { - int j = ((Integer) objarr[i]).intValue(); - key [i * 4] = (byte) ((j & 0xFF000000) >> 24); - key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16); - key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8); - key [i * 4 + 3] = (byte) (j & 0x000000FF); - } - - return new CollationKey(this, source, key); - } - - /** - * This method returns a <code>String</code> containing the collation rules - * for this object. - * - * @return The collation rules for this object. - */ - public String getRules() - { - return rules; - } - - /** - * This method returns a hash value for this object. - * - * @return A hash value for this object. - */ - public int hashCode() - { - return System.identityHashCode(this); - } -} |