diff options
Diffstat (limited to 'libjava/classpath/gnu/java/util/regex/REMatch.java')
-rw-r--r-- | libjava/classpath/gnu/java/util/regex/REMatch.java | 382 |
1 files changed, 210 insertions, 172 deletions
diff --git a/libjava/classpath/gnu/java/util/regex/REMatch.java b/libjava/classpath/gnu/java/util/regex/REMatch.java index d8994829323..d29972e1dd8 100644 --- a/libjava/classpath/gnu/java/util/regex/REMatch.java +++ b/libjava/classpath/gnu/java/util/regex/REMatch.java @@ -37,6 +37,9 @@ exception statement from your version. */ package gnu.java.util.regex; + +import gnu.java.lang.CPStringBuilder; + import java.io.Serializable; /** @@ -47,100 +50,112 @@ import java.io.Serializable; * * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A> */ -public final class REMatch implements Serializable, Cloneable { - private String matchedText; - private CharIndexed matchedCharIndexed; - - // These variables are package scope for fast access within the engine - int eflags; // execution flags this match was made using - - // Offset in source text where match was tried. This is zero-based; - // the actual position in the source text is given by (offset + anchor). - int offset; - - // Anchor position refers to the index into the source input - // at which the matching operation began. - // This is also useful for the ANCHORINDEX option. - int anchor; - - // Package scope; used by RE. - int index; // used while matching to mark current match position in input - // start1[i] is set when the i-th subexp starts. And start1[i] is copied - // to start[i] when the i-th subexp ends. So start[i] keeps the previously - // assigned value while the i-th subexp is being processed. This makes - // backreference to the i-th subexp within the i-th subexp possible. - int[] start; // start positions (relative to offset) for each (sub)exp. - int[] start1; // start positions (relative to offset) for each (sub)exp. - int[] end; // end positions for the same - // start[i] == -1 or end[i] == -1 means that the start/end position is void. - // start[i] == p or end[i] == p where p < 0 and p != -1 means that - // the actual start/end position is (p+1). Start/end positions may - // become negative when the subexpression is in a RETokenLookBehind. - boolean empty; // empty string matched. This flag is used only within - // RETokenRepeated. - - BacktrackStack backtrackStack; - - public Object clone() { - try { - REMatch copy = (REMatch) super.clone(); - - copy.start = (int[]) start.clone(); - copy.start1 = (int[]) start1.clone(); - copy.end = (int[]) end.clone(); - - return copy; - } catch (CloneNotSupportedException e) { - throw new Error(); // doesn't happen - } - } +public final class REMatch implements Serializable, Cloneable +{ + private String matchedText; + private CharIndexed matchedCharIndexed; - void assignFrom(REMatch other) { - start = other.start; - start1 = other.start1; - end = other.end; - index = other.index; - backtrackStack = other.backtrackStack; - } + // These variables are package scope for fast access within the engine + int eflags; // execution flags this match was made using - REMatch(int subs, int anchor, int eflags) { - start = new int[subs+1]; - start1 = new int[subs+1]; - end = new int[subs+1]; - this.anchor = anchor; - this.eflags = eflags; - clear(anchor); - } + // Offset in source text where match was tried. This is zero-based; + // the actual position in the source text is given by (offset + anchor). + int offset; - void finish(CharIndexed text) { - start[0] = 0; - StringBuffer sb = new StringBuffer(); - int i; - for (i = 0; i < end[0]; i++) - sb.append(text.charAt(i)); - matchedText = sb.toString(); - matchedCharIndexed = text; - for (i = 0; i < start.length; i++) { - // If any subexpressions didn't terminate, they don't count - // TODO check if this code ever gets hit - if ((start[i] == -1) ^ (end[i] == -1)) { - start[i] = -1; - end[i] = -1; - } - } - backtrackStack = null; + // Anchor position refers to the index into the source input + // at which the matching operation began. + // This is also useful for the ANCHORINDEX option. + int anchor; + + // Package scope; used by RE. + int index; // used while matching to mark current match position in input + // start1[i] is set when the i-th subexp starts. And start1[i] is copied + // to start[i] when the i-th subexp ends. So start[i] keeps the previously + // assigned value while the i-th subexp is being processed. This makes + // backreference to the i-th subexp within the i-th subexp possible. + int[] start; // start positions (relative to offset) for each (sub)exp. + int[] start1; // start positions (relative to offset) for each (sub)exp. + int[] end; // end positions for the same + // start[i] == -1 or end[i] == -1 means that the start/end position is void. + // start[i] == p or end[i] == p where p < 0 and p != -1 means that + // the actual start/end position is (p+1). Start/end positions may + // become negative when the subexpression is in a RETokenLookBehind. + boolean empty; // empty string matched. This flag is used only within + // RETokenRepeated. + + BacktrackStack backtrackStack; + + public Object clone () + { + try + { + REMatch copy = (REMatch) super.clone (); + + copy.start = (int[]) start.clone (); + copy.start1 = (int[]) start1.clone (); + copy.end = (int[]) end.clone (); + + return copy; } - - /** Clears the current match and moves the offset to the new index. */ - void clear(int index) { - offset = index; - this.index = 0; - for (int i = 0; i < start.length; i++) { - start[i] = start1[i] = end[i] = -1; - } - backtrackStack = null; + catch (CloneNotSupportedException e) + { + throw new Error (); // doesn't happen } - + } + + void assignFrom (REMatch other) + { + start = other.start; + start1 = other.start1; + end = other.end; + index = other.index; + backtrackStack = other.backtrackStack; + } + + REMatch (int subs, int anchor, int eflags) + { + start = new int[subs + 1]; + start1 = new int[subs + 1]; + end = new int[subs + 1]; + this.anchor = anchor; + this.eflags = eflags; + clear (anchor); + } + + void finish (CharIndexed text) + { + start[0] = 0; + CPStringBuilder sb = new CPStringBuilder (); + int i; + for (i = 0; i < end[0]; i++) + sb.append (text.charAt (i)); + matchedText = sb.toString (); + matchedCharIndexed = text; + for (i = 0; i < start.length; i++) + { + // If any subexpressions didn't terminate, they don't count + // TODO check if this code ever gets hit + if ((start[i] == -1) ^ (end[i] == -1)) + { + start[i] = -1; + end[i] = -1; + } + } + backtrackStack = null; + } + + /** Clears the current match and moves the offset to the new index. */ + void clear (int index) + { + offset = index; + this.index = 0; + for (int i = 0; i < start.length; i++) + { + start[i] = start1[i] = end[i] = -1; + } + backtrackStack = null; + } + /** * Returns the string matching the pattern. This makes it convenient * to write code like the following: @@ -150,18 +165,20 @@ public final class REMatch implements Serializable, Cloneable { * if (myMatch != null) System.out.println("Regexp found: "+myMatch); * </code> */ - public String toString() { - return matchedText; - } - + public String toString () + { + return matchedText; + } + /** * Returns the index within the input text where the match in its entirety * began. */ - public int getStartIndex() { - return offset + start[0]; - } - + public int getStartIndex () + { + return offset + start[0]; + } + /** * Returns the index within the input string where the match in * its entirety ends. The return value is the next position after @@ -179,10 +196,11 @@ public final class REMatch implements Serializable, Cloneable { * But you can save yourself that work, since the <code>toString()</code> * method (above) does exactly that for you. */ - public int getEndIndex() { - return offset + end[0]; - } - + public int getEndIndex () + { + return offset + end[0]; + } + /** * Returns the string matching the given subexpression. The subexpressions * are indexed starting with one, not zero. That is, the subexpression @@ -191,25 +209,30 @@ public final class REMatch implements Serializable, Cloneable { * * @param sub Index of the subexpression. */ - public String toString(int sub) { - if ((sub >= start.length) || sub < 0) - throw new IndexOutOfBoundsException("No group " + sub); - if (start[sub] == -1) return null; - if (start[sub] >= 0 && end[sub] <= matchedText.length()) - return (matchedText.substring(start[sub],end[sub])); - else { + public String toString (int sub) + { + if ((sub >= start.length) || sub < 0) + throw new IndexOutOfBoundsException ("No group " + sub); + if (start[sub] == -1) + return null; + if (start[sub] >= 0 && end[sub] <= matchedText.length ()) + return (matchedText.substring (start[sub], end[sub])); + else + { // This case occurs with RETokenLookAhead or RETokenLookBehind. - StringBuffer sb = new StringBuffer(); - int s = start[sub]; - int e = end[sub]; - if (s < 0) s += 1; - if (e < 0) e += 1; - for (int i = start[0] + s; i < start[0] + e; i++) - sb.append(matchedCharIndexed.charAt(i)); - return sb.toString(); - } - } - + CPStringBuilder sb = new CPStringBuilder (); + int s = start[sub]; + int e = end[sub]; + if (s < 0) + s += 1; + if (e < 0) + e += 1; + for (int i = start[0] + s; i < start[0] + e; i++) + sb.append (matchedCharIndexed.charAt (i)); + return sb.toString (); + } + } + /** * Returns the index within the input string used to generate this match * where subexpression number <i>sub</i> begins, or <code>-1</code> if @@ -218,13 +241,14 @@ public final class REMatch implements Serializable, Cloneable { * @param sub Subexpression index * @deprecated Use getStartIndex(int) instead. */ - public int getSubStartIndex(int sub) { - if (sub >= start.length) return -1; - int x = start[sub]; - return (x == -1) ? x : - (x >= 0) ? offset + x : offset + x + 1; - } - + public int getSubStartIndex (int sub) + { + if (sub >= start.length) + return -1; + int x = start[sub]; + return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1; + } + /** * Returns the index within the input string used to generate this match * where subexpression number <i>sub</i> begins, or <code>-1</code> if @@ -233,13 +257,14 @@ public final class REMatch implements Serializable, Cloneable { * @param sub Subexpression index * @since gnu.regexp 1.1.0 */ - public int getStartIndex(int sub) { - if (sub >= start.length) return -1; - int x = start[sub]; - return (x == -1) ? x : - (x >= 0) ? offset + x : offset + x + 1; - } - + public int getStartIndex (int sub) + { + if (sub >= start.length) + return -1; + int x = start[sub]; + return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1; + } + /** * Returns the index within the input string used to generate this match * where subexpression number <i>sub</i> ends, or <code>-1</code> if @@ -248,13 +273,14 @@ public final class REMatch implements Serializable, Cloneable { * @param sub Subexpression index * @deprecated Use getEndIndex(int) instead */ - public int getSubEndIndex(int sub) { - if (sub >= start.length) return -1; - int x = end[sub]; - return (x == -1) ? x : - (x >= 0) ? offset + x : offset + x + 1; - } - + public int getSubEndIndex (int sub) + { + if (sub >= start.length) + return -1; + int x = end[sub]; + return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1; + } + /** * Returns the index within the input string used to generate this match * where subexpression number <i>sub</i> ends, or <code>-1</code> if @@ -262,13 +288,14 @@ public final class REMatch implements Serializable, Cloneable { * * @param sub Subexpression index */ - public int getEndIndex(int sub) { - if (sub >= start.length) return -1; - int x = end[sub]; - return (x == -1) ? x : - (x >= 0) ? offset + x : offset + x + 1; - } - + public int getEndIndex (int sub) + { + if (sub >= start.length) + return -1; + int x = end[sub]; + return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1; + } + /** * Substitute the results of this match to create a new string. * This is patterned after PERL, so the tokens to watch out for are @@ -280,31 +307,42 @@ public final class REMatch implements Serializable, Cloneable { * * @param input A string consisting of literals and <code>$<i>n</i></code> tokens. */ - public String substituteInto(String input) { - // a la Perl, $0 is whole thing, $1 - $9 are subexpressions - StringBuffer output = new StringBuffer(); - int pos; - for (pos = 0; pos < input.length()-1; pos++) { - if ((input.charAt(pos) == '$') && (Character.isDigit(input.charAt(pos+1)))) { - int val = Character.digit(input.charAt(++pos),10); - int pos1 = pos + 1; - while (pos1 < input.length() && - Character.isDigit(input.charAt(pos1))) { - int val1 = val*10 + Character.digit(input.charAt(pos1),10); - if (val1 >= start.length) break; - pos1++; - val = val1; - } - pos = pos1 - 1; - - if (val < start.length) { - output.append(toString(val)); - } - } else output.append(input.charAt(pos)); - } - if (pos < input.length()) output.append(input.charAt(pos)); - return output.toString(); - } + public String substituteInto (String input) + { + // a la Perl, $0 is whole thing, $1 - $9 are subexpressions + CPStringBuilder output = new CPStringBuilder (); + int pos; + for (pos = 0; pos < input.length () - 1; pos++) + { + if ((input.charAt (pos) == '$') + && (Character.isDigit (input.charAt (pos + 1)))) + { + int val = Character.digit (input.charAt (++pos), 10); + int pos1 = pos + 1; + while (pos1 < input.length () && + Character.isDigit (input.charAt (pos1))) + { + int val1 = + val * 10 + Character.digit (input.charAt (pos1), 10); + if (val1 >= start.length) + break; + pos1++; + val = val1; + } + pos = pos1 - 1; + + if (val < start.length) + { + output.append (toString (val)); + } + } + else + output.append (input.charAt (pos)); + } + if (pos < input.length ()) + output.append (input.charAt (pos)); + return output.toString (); + } /* The following are used for debugging purpose public static String d(REMatch m) { |