24 files changed, 1712 insertions, 296 deletions
diff --git a/libjava/classpath/gnu/regexp/CharIndexed.java b/libjava/classpath/gnu/regexp/CharIndexed.java
index a0d7106aefa..df1d8930c6b 100644
--- a/libjava/classpath/gnu/regexp/CharIndexed.java
+++ b/libjava/classpath/gnu/regexp/CharIndexed.java
@@ -1,5 +1,5 @@
 /* gnu/regexp/CharIndexed.java
-   Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+   Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -81,4 +81,16 @@ public interface CharIndexed {
      * position at a valid position in the input.
      */
     boolean isValid();
+
+    /**
+     * Returns another CharIndexed containing length characters to the left
+     * of the given index. The given length is an expected maximum and
+     * the returned CharIndexed may not necessarily contain so many characters.
+     */
+    CharIndexed lookBehind(int index, int length);
+
+    /**
+     * Returns the effective length of this CharIndexed
+     */
+    int length();
 }
diff --git a/libjava/classpath/gnu/regexp/CharIndexedCharArray.java b/libjava/classpath/gnu/regexp/CharIndexedCharArray.java
index 63d858c8709..1388d4729bf 100644
--- a/libjava/classpath/gnu/regexp/CharIndexedCharArray.java
+++ b/libjava/classpath/gnu/regexp/CharIndexedCharArray.java
@@ -1,5 +1,5 @@
 /* gnu/regexp/CharIndexedCharArray.java
-   Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+   Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -59,4 +59,13 @@ class CharIndexedCharArray implements CharIndexed, Serializable {
     public boolean move(int index) {
 	return ((anchor += index) < s.length);
     }
+    
+    public CharIndexed lookBehind(int index, int length) {
+	if (length > (anchor + index)) length = anchor + index;
+	return new CharIndexedCharArray(s, anchor + index - length);
+    }
+
+    public int length() {
+	return s.length - anchor;
+    }
 }
diff --git a/libjava/classpath/gnu/regexp/CharIndexedInputStream.java b/libjava/classpath/gnu/regexp/CharIndexedInputStream.java
index 145fe11b135..d5225a79337 100644
--- a/libjava/classpath/gnu/regexp/CharIndexedInputStream.java
+++ b/libjava/classpath/gnu/regexp/CharIndexedInputStream.java
@@ -1,5 +1,5 @@
 /* gnu/regexp/CharIndexedInputStream.java
-   Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+   Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -145,5 +145,15 @@ class CharIndexedInputStream implements CharIndexed {
     public boolean isValid() {
 	return (cached != OUT_OF_BOUNDS);
     }
+
+    public CharIndexed lookBehind(int index, int length) {
+	throw new UnsupportedOperationException(
+	    "difficult to look behind for an input stream");
+    }
+
+    public int length() {
+	throw new UnsupportedOperationException(
+	    "difficult to tell the length for an input stream");
+    }
 }
 
diff --git a/libjava/classpath/gnu/regexp/CharIndexedString.java b/libjava/classpath/gnu/regexp/CharIndexedString.java
index 05be07ac68c..fe4fa8f7637 100644
--- a/libjava/classpath/gnu/regexp/CharIndexedString.java
+++ b/libjava/classpath/gnu/regexp/CharIndexedString.java
@@ -1,5 +1,5 @@
 /* gnu/regexp/CharIndexedString.java
-   Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+   Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -61,4 +61,13 @@ class CharIndexedString implements CharIndexed, Serializable {
     public boolean move(int index) {
 	return ((anchor += index) < len);
     }
+
+    public CharIndexed lookBehind(int index, int length) {
+	if (length > (anchor + index)) length = anchor + index;
+	return new CharIndexedString(s, anchor + index - length);
+    }
+
+    public int length() {
+	return len - anchor;
+    }
 }
diff --git a/libjava/classpath/gnu/regexp/CharIndexedStringBuffer.java b/libjava/classpath/gnu/regexp/CharIndexedStringBuffer.java
index 1b88e398571..9c9118dfee2 100644
--- a/libjava/classpath/gnu/regexp/CharIndexedStringBuffer.java
+++ b/libjava/classpath/gnu/regexp/CharIndexedStringBuffer.java
@@ -1,5 +1,5 @@
 /* gnu/regexp/CharIndexedStringBuffer.java
-   Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+   Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -59,4 +59,13 @@ class CharIndexedStringBuffer implements CharIndexed, Serializable {
   public boolean move(int index) {
     return ((anchor += index) < s.length());
   }
+
+  public CharIndexed lookBehind(int index, int length) {
+    if (length > (anchor + index)) length = anchor + index;
+    return new CharIndexedStringBuffer(s, anchor + index - length);
+  }
+
+  public int length() {
+    return s.length() - anchor;
+  }
 }
diff --git a/libjava/classpath/gnu/regexp/RE.java b/libjava/classpath/gnu/regexp/RE.java
index 9ac9b53d1a9..ef606a6d8a7 100644
--- a/libjava/classpath/gnu/regexp/RE.java
+++ b/libjava/classpath/gnu/regexp/RE.java
@@ -1,5 +1,5 @@
 /* gnu/regexp/RE.java
-   Copyright (C) 1998-2001, 2004, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2006 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -136,12 +136,13 @@ public class RE extends REToken {
 
     /** Minimum length, in characters, of any possible match. */
     private int minimumLength;
+    private int maximumLength;
 
   /**
    * Compilation flag. Do  not  differentiate  case.   Subsequent
    * searches  using  this  RE will be case insensitive.
    */
-  public static final int REG_ICASE = 2;
+  public static final int REG_ICASE = 0x02;
 
   /**
    * Compilation flag. The match-any-character operator (dot)
@@ -149,14 +150,14 @@ public class RE extends REToken {
    * bit RE_DOT_NEWLINE (see RESyntax for details).  This is equivalent to
    * the "/s" operator in Perl.
    */
-  public static final int REG_DOT_NEWLINE = 4;
+  public static final int REG_DOT_NEWLINE = 0x04;
 
   /**
    * Compilation flag. Use multiline mode.  In this mode, the ^ and $
    * anchors will match based on newlines within the input. This is
    * equivalent to the "/m" operator in Perl.
    */
-  public static final int REG_MULTILINE = 8;
+  public static final int REG_MULTILINE = 0x08;
 
   /**
    * Execution flag.
@@ -185,14 +186,14 @@ public class RE extends REToken {
    * //  m4.toString(): "fool"<BR>
    * </CODE>
    */
-  public static final int REG_NOTBOL = 16;
+  public static final int REG_NOTBOL = 0x10;
 
   /**
    * Execution flag.
    * The match-end operator ($) does not match at the end
    * of the input string. Useful for matching on substrings.
    */
-  public static final int REG_NOTEOL = 32;
+  public static final int REG_NOTEOL = 0x20;
 
   /**
    * Execution flag.
@@ -206,7 +207,7 @@ public class RE extends REToken {
    * the example under REG_NOTBOL.  It also affects the use of the \&lt;
    * and \b operators.
    */
-  public static final int REG_ANCHORINDEX = 64;
+  public static final int REG_ANCHORINDEX = 0x40;
 
   /**
    * Execution flag.
@@ -215,7 +216,24 @@ public class RE extends REToken {
    * the corresponding subexpressions.  For example, you may want to
    * replace all matches of "one dollar" with "$1".
    */
-  public static final int REG_NO_INTERPOLATE = 128;
+  public static final int REG_NO_INTERPOLATE = 0x80;
+
+  /**
+   * Execution flag.
+   * Try to match the whole input string. An implicit match-end operator
+   * is added to this regexp.
+   */
+  public static final int REG_TRY_ENTIRE_MATCH = 0x0100;
+
+  /**
+   * Execution flag.
+   * The substitute and substituteAll methods will treat the
+   * character '\' in the replacement as an escape to a literal
+   * character. In this case "\n", "\$", "\\", "\x40" and "\012"
+   * will become "n", "$", "\", "x40" and "012" respectively.
+   * This flag has no effect if REG_NO_INTERPOLATE is set on.
+   */
+  public static final int REG_REPLACE_USE_BACKSLASHESCAPE = 0x0200;
 
   /** Returns a string representing the version of the gnu.regexp package. */
   public static final String version() {
@@ -273,12 +291,13 @@ public class RE extends REToken {
   }
 
   // internal constructor used for alternation
-  private RE(REToken first, REToken last,int subs, int subIndex, int minLength) {
+  private RE(REToken first, REToken last,int subs, int subIndex, int minLength, int maxLength) {
     super(subIndex);
     firstToken = first;
     lastToken = last;
     numSubs = subs;
     minimumLength = minLength;
+    maximumLength = maxLength;
     addToken(new RETokenEndSub(subIndex));
   }
 
@@ -333,6 +352,11 @@ public class RE extends REToken {
     char ch;
     boolean quot = false;
 
+    // Saved syntax and flags.
+    RESyntax savedSyntax = null;
+    int savedCflags = 0;
+    boolean flagsSaved = false;
+
     while (index < pLength) {
       // read the next character unit (including backslash escapes)
       index = getCharUnit(pattern,index,unit,quot);
@@ -359,8 +383,9 @@ public class RE extends REToken {
 	   && !syntax.get(RESyntax.RE_LIMITED_OPS)) {
 	// make everything up to here be a branch. create vector if nec.
 	addToken(currentToken);
-	RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength);
+	RE theBranch = new RE(firstToken, lastToken, numSubs, subIndex, minimumLength, maximumLength);
 	minimumLength = 0;
+	maximumLength = 0;
 	if (branches == null) {
 	    branches = new Vector();
 	}
@@ -402,102 +427,12 @@ public class RE extends REToken {
       //  [...] | [^...]
 
       else if ((unit.ch == '[') && !(unit.bk || quot)) {
-	Vector options = new Vector();
-	boolean negative = false;
-	char lastChar = 0;
-	if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index);
-	
-	// Check for initial caret, negation
-	if ((ch = pattern[index]) == '^') {
-	  negative = true;
-	  if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
-	  ch = pattern[index];
-	}
-
-	// Check for leading right bracket literal
-	if (ch == ']') {
-	  lastChar = ch;
-	  if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
-	}
-
-	while ((ch = pattern[index++]) != ']') {
-	  if ((ch == '-') && (lastChar != 0)) {
-	    if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
-	    if ((ch = pattern[index]) == ']') {
-	      options.addElement(new RETokenChar(subIndex,lastChar,insens));
-	      lastChar = '-';
-	    } else {
-	      options.addElement(new RETokenRange(subIndex,lastChar,ch,insens));
-	      lastChar = 0;
-	      index++;
-	    }
-          } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
-            if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
-	    int posixID = -1;
-	    boolean negate = false;
-            char asciiEsc = 0;
-	    if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) {
-	      switch (pattern[index]) {
-	      case 'D':
-		negate = true;
-	      case 'd':
-		posixID = RETokenPOSIX.DIGIT;
-		break;
-	      case 'S':
-		negate = true;
-	      case 's':
-		posixID = RETokenPOSIX.SPACE;
-		break;
-	      case 'W':
-		negate = true;
-	      case 'w':
-		posixID = RETokenPOSIX.ALNUM;
-		break;
-	      }
-	    }
-            else if ("nrt".indexOf(pattern[index]) != -1) {
-              switch (pattern[index]) {
-                case 'n':
-                  asciiEsc = '\n';
-                  break;
-                case 't':
-                  asciiEsc = '\t';
-                  break;
-                case 'r':
-                  asciiEsc = '\r';
-                  break;
-              }
-            }
-	    if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
-	    
-	    if (posixID != -1) {
-	      options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate));
-	    } else if (asciiEsc != 0) {
-	      lastChar = asciiEsc;
-	    } else {
-	      lastChar = pattern[index];
-	    }
-	    ++index;
-	  } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) {
-	    StringBuffer posixSet = new StringBuffer();
-	    index = getPosixSet(pattern,index+1,posixSet);
-	    int posixId = RETokenPOSIX.intValue(posixSet.toString());
-	    if (posixId != -1)
-	      options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false));
-	  } else {
-	    if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
-	    lastChar = ch;
-	  }
-	  if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
-	} // while in list
-	// Out of list, index is one past ']'
-	    
-	if (lastChar != 0) options.addElement(new RETokenChar(subIndex,lastChar,insens));
-	    
 	// Create a new RETokenOneOf
+	ParseCharClassResult result = parseCharClass(
+		subIndex, pattern, index, pLength, cflags, syntax, 0);
 	addToken(currentToken);
-	options.trimToSize();
-	currentToken = new RETokenOneOf(subIndex,options,negative);
+	currentToken = result.token;
+	index = result.index;
       }
 
       // SUBEXPRESSIONS
@@ -507,7 +442,10 @@ public class RE extends REToken {
 	boolean pure = false;
 	boolean comment = false;
         boolean lookAhead = false;
+        boolean lookBehind = false;
+        boolean independent = false;
         boolean negativelh = false;
+        boolean negativelb = false;
 	if ((index+1 < pLength) && (pattern[index] == '?')) {
 	  switch (pattern[index+1]) {
           case '!':
@@ -525,6 +463,114 @@ public class RE extends REToken {
               index += 2;
             }
             break;
+	  case '<':
+	    // We assume that if the syntax supports look-ahead,
+	    // it also supports look-behind.
+	    if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
+		index++;
+		switch (pattern[index +1]) {
+		case '!':
+		  pure = true;
+		  negativelb = true;
+		  lookBehind = true;
+		  index += 2;
+		  break;
+		case '=':
+		  pure = true;
+		  lookBehind = true;
+		  index += 2;
+		}
+	    }
+	    break;
+	  case '>':
+	    // We assume that if the syntax supports look-ahead,
+	    // it also supports independent group.
+            if (syntax.get(RESyntax.RE_LOOKAHEAD)) {
+              pure = true;
+              independent = true;
+              index += 2;
+            }
+            break;
+	  case 'i':
+	  case 'd':
+	  case 'm':
+	  case 's':
+	  // case 'u':  not supported
+	  // case 'x':  not supported
+	  case '-':
+            if (!syntax.get(RESyntax.RE_EMBEDDED_FLAGS)) break;
+	    // Set or reset syntax flags.
+	    int flagIndex = index + 1;
+	    int endFlag = -1;
+	    RESyntax newSyntax = new RESyntax(syntax);
+	    int newCflags = cflags;
+	    boolean negate = false;
+	    while (flagIndex < pLength && endFlag < 0) {
+	        switch(pattern[flagIndex]) {
+	  	case 'i':
+		  if (negate)
+		    newCflags &= ~REG_ICASE;
+		  else
+		    newCflags |= REG_ICASE;
+		  flagIndex++;
+		  break;
+	  	case 'd':
+		  if (negate)
+		    newSyntax.setLineSeparator(RESyntax.DEFAULT_LINE_SEPARATOR);
+		  else
+		    newSyntax.setLineSeparator("\n");
+		  flagIndex++;
+		  break;
+	  	case 'm':
+		  if (negate)
+		    newCflags &= ~REG_MULTILINE;
+		  else
+		    newCflags |= REG_MULTILINE;
+		  flagIndex++;
+		  break;
+	  	case 's':
+		  if (negate)
+		    newCflags &= ~REG_DOT_NEWLINE;
+		  else
+		    newCflags |= REG_DOT_NEWLINE;
+		  flagIndex++;
+		  break;
+	  	// case 'u': not supported
+	  	// case 'x': not supported
+	  	case '-':
+		  negate = true;
+		  flagIndex++;
+		  break;
+		case ':':
+		case ')':
+		  endFlag = pattern[flagIndex];
+		  break;
+		default:
+            	  throw new REException(getLocalizedMessage("repeat.no.token"), REException.REG_BADRPT, index);
+		}
+	    }
+	    if (endFlag == ')') {
+		syntax = newSyntax;
+		cflags = newCflags;
+		insens = ((cflags & REG_ICASE) > 0);
+		// This can be treated as though it were a comment.
+		comment = true;
+		index = flagIndex - 1;
+		break;
+	    }
+	    if (endFlag == ':') {
+		savedSyntax = syntax;
+		savedCflags = cflags;
+		flagsSaved = true;
+		syntax = newSyntax;
+		cflags = newCflags;
+		insens = ((cflags & REG_ICASE) > 0);
+		index = flagIndex -1;
+		// Fall through to the next case.
+	    }
+	    else {
+	        throw new REException(getLocalizedMessage("unmatched.paren"), REException.REG_ESUBREG,index);
+	    }
 	  case ':':
 	    if (syntax.get(RESyntax.RE_PURE_GROUPING)) {
 	      pure = true;
@@ -607,15 +653,28 @@ public class RE extends REToken {
 	    numSubs++;
 	  }
 
-	  int useIndex = (pure || lookAhead) ? 0 : nextSub + numSubs;
+	  int useIndex = (pure || lookAhead || lookBehind || independent) ?
+			 0 : nextSub + numSubs;
 	  currentToken = new RE(String.valueOf(pattern,index,endIndex-index).toCharArray(),cflags,syntax,useIndex,nextSub + numSubs);
 	  numSubs += ((RE) currentToken).getNumSubs();
 
           if (lookAhead) {
 	      currentToken = new RETokenLookAhead(currentToken,negativelh);
 	  }
+          else if (lookBehind) {
+	      currentToken = new RETokenLookBehind(currentToken,negativelb);
+	  }
+          else if (independent) {
+	      currentToken = new RETokenIndependent(currentToken);
+	  }
 
 	  index = nextIndex;
+	  if (flagsSaved) {
+	      syntax = savedSyntax;
+	      cflags = savedCflags;
+	      insens = ((cflags & REG_ICASE) > 0);
+	      flagsSaved = false;
+	  }
 	} // not a comment
       } // subexpression
     
@@ -715,14 +774,45 @@ public class RE extends REToken {
 	else
 	  currentToken = setRepeated(currentToken,0,1,index);
       }
+
+      // OCTAL CHARACTER
+      //  \0377
 	
+      else if (unit.bk && (unit.ch == '0') && syntax.get(RESyntax.RE_OCTAL_CHAR)) {
+	CharExpression ce = getCharExpression(pattern, index - 2, pLength, syntax);
+	if (ce == null)
+	  throw new REException("invalid octal character", REException.REG_ESCAPE, index);
+	index = index - 2 + ce.len;
+	addToken(currentToken);
+	currentToken = new RETokenChar(subIndex,ce.ch,insens);
+      }
+
       // BACKREFERENCE OPERATOR
-      //  \1 \2 ... \9
+      //  \1 \2 ... \9 and \10 \11 \12 ...
       // not available if RE_NO_BK_REFS is set
+      // Perl recognizes \10, \11, and so on only if enough number of
+      // parentheses have opened before it, otherwise they are treated
+      // as aliases of \010, \011, ... (octal characters).  In case of
+      // Sun's JDK, octal character expression must always begin with \0.
+      // We will do as JDK does. But FIXME, take a look at "(a)(b)\29".
+      // JDK treats \2 as a back reference to the 2nd group because
+      // there are only two groups. But in our poor implementation,
+      // we cannot help but treat \29 as a back reference to the 29th group.
 
       else if (unit.bk && Character.isDigit(unit.ch) && !syntax.get(RESyntax.RE_NO_BK_REFS)) {
 	addToken(currentToken);
-	currentToken = new RETokenBackRef(subIndex,Character.digit(unit.ch,10),insens);
+	int numBegin = index - 1;
+	int numEnd = pLength;
+	for (int i = index; i < pLength; i++) {
+	    if (! Character.isDigit(pattern[i])) {
+		numEnd = i;
+		break;
+	    }
+	}
+	int num = parseInt(pattern, numBegin, numEnd-numBegin, 10);
+
+	currentToken = new RETokenBackRef(subIndex,num,insens);
+	index = numEnd;
       }
 
       // START OF STRING OPERATOR
@@ -844,6 +934,32 @@ public class RE extends REToken {
 	  currentToken = new RETokenEnd(subIndex,null);
 	}
 
+        // HEX CHARACTER, UNICODE CHARACTER
+        //  \x1B, \u1234
+	
+	else if ((unit.bk && (unit.ch == 'x') && syntax.get(RESyntax.RE_HEX_CHAR)) ||
+		 (unit.bk && (unit.ch == 'u') && syntax.get(RESyntax.RE_UNICODE_CHAR))) {
+	  CharExpression ce = getCharExpression(pattern, index - 2, pLength, syntax);
+	  if (ce == null)
+	    throw new REException("invalid hex character", REException.REG_ESCAPE, index);
+	  index = index - 2 + ce.len;
+	  addToken(currentToken);
+	  currentToken = new RETokenChar(subIndex,ce.ch,insens);
+	}
+
+	// NAMED PROPERTY
+	// \p{prop}, \P{prop}
+
+	else if ((unit.bk && (unit.ch == 'p') && syntax.get(RESyntax.RE_NAMED_PROPERTY)) ||
+	         (unit.bk && (unit.ch == 'P') && syntax.get(RESyntax.RE_NAMED_PROPERTY))) {
+	  NamedProperty np = getNamedProperty(pattern, index - 2, pLength);
+	  if (np == null)
+	      throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
+	  index = index - 2 + np.len;
+	  addToken(currentToken);
+	  currentToken = getRETokenNamedProperty(subIndex,np,insens,index);
+	}
+
 	// NON-SPECIAL CHARACTER (or escape to make literal)
         //  c | \* for example
 
@@ -857,9 +973,10 @@ public class RE extends REToken {
     addToken(currentToken);
       
     if (branches != null) {
-	branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength));
+	branches.addElement(new RE(firstToken,lastToken,numSubs,subIndex,minimumLength, maximumLength));
 	branches.trimToSize(); // compact the Vector
 	minimumLength = 0;
+	maximumLength = 0;
 	firstToken = lastToken = null;
 	addToken(new RETokenOneOf(subIndex,branches,false));
     } 
@@ -867,6 +984,199 @@ public class RE extends REToken {
 
   }
 
+  private static class ParseCharClassResult {
+      RETokenOneOf token;
+      int index;
+      boolean returnAtAndOperator = false;
+  }
+
+  /**
+   * Parse [...] or [^...] and make an RETokenOneOf instance.
+   * @param subIndex subIndex to be given to the created RETokenOneOf instance.
+   * @param pattern Input array of characters to be parsed.
+   * @param index Index pointing to the character next to the beginning '['.
+   * @param pLength Limit of the input array.
+   * @param cflags Compilation flags used to parse the pattern.
+   * @param pflags Flags that affect the behavior of this method.
+   * @param syntax Syntax used to parse the pattern.
+   */
+  private static ParseCharClassResult parseCharClass(int subIndex,
+		char[] pattern, int index,
+		int pLength, int cflags, RESyntax syntax, int pflags)
+		throws REException {
+
+	boolean insens = ((cflags & REG_ICASE) > 0);
+	Vector options = new Vector();
+	Vector addition = new Vector();
+	boolean additionAndAppeared = false;
+	final int RETURN_AT_AND = 0x01;
+	boolean returnAtAndOperator = ((pflags & RETURN_AT_AND) != 0);
+	boolean negative = false;
+	char ch;
+
+	char lastChar = 0;
+	boolean lastCharIsSet = false;
+	if (index == pLength) throw new REException(getLocalizedMessage("unmatched.bracket"),REException.REG_EBRACK,index);
+	
+	// Check for initial caret, negation
+	if ((ch = pattern[index]) == '^') {
+	  negative = true;
+	  if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
+	  ch = pattern[index];
+	}
+
+	// Check for leading right bracket literal
+	if (ch == ']') {
+	  lastChar = ch; lastCharIsSet = true;
+	  if (++index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
+	}
+
+	while ((ch = pattern[index++]) != ']') {
+	  if ((ch == '-') && (lastCharIsSet)) {
+	    if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
+	    if ((ch = pattern[index]) == ']') {
+	      options.addElement(new RETokenChar(subIndex,lastChar,insens));
+	      lastChar = '-';
+	    } else {
+	      if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
+	        CharExpression ce = getCharExpression(pattern, index, pLength, syntax);
+	        if (ce == null)
+		  throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
+		ch = ce.ch;
+		index = index + ce.len - 1;
+	      }
+	      options.addElement(new RETokenRange(subIndex,lastChar,ch,insens));
+	      lastChar = 0; lastCharIsSet = false;
+	      index++;
+	    }
+          } else if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
+            if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
+	    int posixID = -1;
+	    boolean negate = false;
+            char asciiEsc = 0;
+	    boolean asciiEscIsSet = false;
+	    NamedProperty np = null;
+	    if (("dswDSW".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) {
+	      switch (pattern[index]) {
+	      case 'D':
+		negate = true;
+	      case 'd':
+		posixID = RETokenPOSIX.DIGIT;
+		break;
+	      case 'S':
+		negate = true;
+	      case 's':
+		posixID = RETokenPOSIX.SPACE;
+		break;
+	      case 'W':
+		negate = true;
+	      case 'w':
+		posixID = RETokenPOSIX.ALNUM;
+		break;
+	      }
+	    }
+	    if (("pP".indexOf(pattern[index]) != -1) && syntax.get(RESyntax.RE_NAMED_PROPERTY)) {
+	      np = getNamedProperty(pattern, index - 1, pLength);
+	      if (np == null)
+		throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
+	      index = index - 1 + np.len - 1;
+	    }
+	    else {
+	      CharExpression ce = getCharExpression(pattern, index - 1, pLength, syntax);
+	      if (ce == null)
+		throw new REException("invalid escape sequence", REException.REG_ESCAPE, index);
+	      asciiEsc = ce.ch; asciiEscIsSet = true;
+	      index = index - 1 + ce.len - 1;
+	    }
+	    if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens));
+	    
+	    if (posixID != -1) {
+	      options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate));
+	    } else if (np != null) {
+	      options.addElement(getRETokenNamedProperty(subIndex,np,insens,index));
+	    } else if (asciiEscIsSet) {
+	      lastChar = asciiEsc; lastCharIsSet = true;
+	    } else {
+	      lastChar = pattern[index]; lastCharIsSet = true;
+	    }
+	    ++index;
+	  } else if ((ch == '[') && (syntax.get(RESyntax.RE_CHAR_CLASSES)) && (index < pLength) && (pattern[index] == ':')) {
+	    StringBuffer posixSet = new StringBuffer();
+	    index = getPosixSet(pattern,index+1,posixSet);
+	    int posixId = RETokenPOSIX.intValue(posixSet.toString());
+	    if (posixId != -1)
+	      options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false));
+	  } else if ((ch == '[') && (syntax.get(RESyntax.RE_NESTED_CHARCLASS))) {
+		ParseCharClassResult result = parseCharClass(
+		    subIndex, pattern, index, pLength, cflags, syntax, 0);
+		addition.addElement(result.token);
+		addition.addElement("|");
+		index = result.index;
+	  } else if ((ch == '&') &&
+		     (syntax.get(RESyntax.RE_NESTED_CHARCLASS)) &&
+		     (index < pLength) && (pattern[index] == '&')) {
+		if (returnAtAndOperator) {
+		    ParseCharClassResult result = new ParseCharClassResult(); 
+		    options.trimToSize();
+		    if (additionAndAppeared) addition.addElement("&");
+		    if (addition.size() == 0) addition = null;
+		    result.token = new RETokenOneOf(subIndex,
+			options, addition, negative);
+		    result.index = index - 1;
+		    result.returnAtAndOperator = true;
+		    return result;
+		}
+		// The precedence of the operator "&&" is the lowest.
+		// So we postpone adding "&" until other elements
+		// are added. And we insert Boolean.FALSE at the
+		// beginning of the list of tokens following "&&".
+		// So, "&&[a-b][k-m]" will be stored in the Vecter
+		// addition in this order:
+		//     Boolean.FALSE, [a-b], "|", [k-m], "|", "&"
+		if (additionAndAppeared) addition.addElement("&");
+		addition.addElement(Boolean.FALSE);
+		additionAndAppeared = true;
+
+		// The part on which "&&" operates may be either
+		//   (1) explicitly enclosed by []
+		//   or
+		//   (2) not enclosed by [] and terminated by the
+		//       next "&&" or the end of the character list.
+	        //  Let the preceding else if block do the case (1).
+		//  We must do something in case of (2).
+		if ((index + 1 < pLength) && (pattern[index + 1] != '[')) {
+		    ParseCharClassResult result = parseCharClass(
+			subIndex, pattern, index+1, pLength, cflags, syntax,
+			RETURN_AT_AND);
+		    addition.addElement(result.token);
+		    addition.addElement("|");
+		    // If the method returned at the next "&&", it is OK.
+		    // Otherwise we have eaten the mark of the end of this
+		    // character list "]".  In this case we must give back
+		    // the end mark.
+		    index = (result.returnAtAndOperator ?
+			result.index: result.index - 1);
+		}
+	  } else {
+	    if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens));
+	    lastChar = ch; lastCharIsSet = true;
+	  }
+	  if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
+	} // while in list
+	// Out of list, index is one past ']'
+	    
+	if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens));
+	   
+	ParseCharClassResult result = new ParseCharClassResult(); 
+	// Create a new RETokenOneOf
+	options.trimToSize();
+	if (additionAndAppeared) addition.addElement("&");
+	if (addition.size() == 0) addition = null;
+	result.token = new RETokenOneOf(subIndex,options, addition, negative);
+	result.index = index;
+	return result;
+  }
+
   private static int getCharUnit(char[] input, int index, CharUnit unit, boolean quot) throws REException {
     unit.ch = input[index++];
     unit.bk = (unit.ch == '\\'
@@ -878,6 +1188,176 @@ public class RE extends REToken {
     return index;
   }
 
+  private static int parseInt(char[] input, int pos, int len, int radix) {
+    int ret = 0;
+    for (int i = pos; i < pos + len; i++) {
+	ret = ret * radix + Character.digit(input[i], radix);
+    }
+    return ret;
+  }
+
+  /**
+   * This class represents various expressions for a character.
+   * "a"      : 'a' itself.
+   * "\0123"  : Octal char 0123
+   * "\x1b"   : Hex char 0x1b
+   * "\u1234" : Unicode char \u1234
+   */
+  private static class CharExpression {
+    /** character represented by this expression */
+    char ch;
+    /** String expression */
+    String expr;
+    /** length of this expression */
+    int len;
+    public String toString() { return expr; }
+  }
+
+  private static CharExpression getCharExpression(char[] input, int pos, int lim,
+        RESyntax syntax) {
+    CharExpression ce = new CharExpression();
+    char c = input[pos];
+    if (c == '\\') {
+      if (pos + 1 >= lim) return null;
+      c = input[pos + 1];
+      switch(c) {
+      case 't':
+        ce.ch = '\t';
+        ce.len = 2;
+        break;
+      case 'n':
+        ce.ch = '\n';
+        ce.len = 2;
+        break;
+      case 'r':
+        ce.ch = '\r';
+        ce.len = 2;
+        break;
+      case 'x':
+      case 'u':
+        if ((c == 'x' && syntax.get(RESyntax.RE_HEX_CHAR)) ||
+            (c == 'u' && syntax.get(RESyntax.RE_UNICODE_CHAR))) {
+          int l = 0;
+          int expectedLength = (c == 'x' ? 2 : 4);
+          for (int i = pos + 2; i < pos + 2 + expectedLength; i++) {
+            if (i >= lim) break;
+            if (!((input[i] >= '0' && input[i] <= '9') ||
+                  (input[i] >= 'A' && input[i] <= 'F') ||
+                  (input[i] >= 'a' && input[i] <= 'f')))
+                break;
+	    l++;
+          }
+          if (l != expectedLength) return null;
+          ce.ch = (char)(parseInt(input, pos + 2, l, 16));
+	  ce.len = l + 2;
+        }
+        else {
+          ce.ch = c;
+          ce.len = 2;
+        }
+        break;
+      case '0':
+        if (syntax.get(RESyntax.RE_OCTAL_CHAR)) {
+          int l = 0;
+          for (int i = pos + 2; i < pos + 2 + 3; i++) {
+            if (i >= lim) break;
+	    if (input[i] < '0' || input[i] > '7') break;
+            l++;
+          }
+          if (l == 3 && input[pos + 2] > '3') l--;
+          if (l <= 0) return null;
+          ce.ch = (char)(parseInt(input, pos + 2, l, 8));
+          ce.len = l + 2;
+        }
+        else {
+          ce.ch = c;
+          ce.len = 2;
+        }
+        break;
+      default:
+        ce.ch = c;
+        ce.len = 2;
+        break;
+      }
+    }
+    else {
+      ce.ch = input[pos];
+      ce.len = 1;
+    }
+    ce.expr = new String(input, pos, ce.len);
+    return ce;
+  }
+
+  /**
+   * This class represents a substring in a pattern string expressing
+   * a named property.
+   * "\pA"      : Property named "A"
+   * "\p{prop}" : Property named "prop"
+   * "\PA"      : Property named "A" (Negated)
+   * "\P{prop}" : Property named "prop" (Negated)
+   */
+  private static class NamedProperty {
+    /** Property name */
+    String name;
+    /** Negated or not */
+    boolean negate;
+    /** length of this expression */
+    int len;
+  }
+
+  private static NamedProperty getNamedProperty(char[] input, int pos, int lim) {
+    NamedProperty np = new NamedProperty();
+    char c = input[pos];
+    if (c == '\\') {
+      if (++pos >= lim) return null;
+      c = input[pos++];
+      switch(c) {
+      case 'p':
+        np.negate = false;
+        break;
+      case 'P':
+        np.negate = true;
+        break;
+      default:
+	return null;
+      }
+      c = input[pos++];
+      if (c == '{') {
+          int p = -1;
+	  for (int i = pos; i < lim; i++) {
+	      if (input[i] == '}') {
+		  p = i;
+		  break;
+	      }
+	  }
+	  if (p < 0) return null;
+	  int len = p - pos;
+          np.name = new String(input, pos, len);
+	  np.len = len + 4;
+      }
+      else {
+          np.name = new String(input, pos - 1, 1);
+	  np.len = 3;
+      }
+      return np;
+    }
+    else return null;
+  }
+
+  private static RETokenNamedProperty getRETokenNamedProperty(
+      int subIndex, NamedProperty np, boolean insens, int index)
+      throws REException {
+    try {
+	return new RETokenNamedProperty(subIndex, np.name, insens, np.negate);
+    }
+    catch (REException e) {
+	REException ree;
+	ree = new REException(e.getMessage(), REException.REG_ESCAPE, index);
+	ree.initCause(e);
+	throw ree;
+    }
+  }
+
   /**
    * Checks if the regular expression matches the input in its entirety.
    *
@@ -958,6 +1438,10 @@ public class RE extends REToken {
       return minimumLength;
   }
 
+  public int getMaximumLength() {
+      return maximumLength;
+  }
+
   /**
    * Returns an array of all matches found in the input.
    *
@@ -1025,7 +1509,9 @@ public class RE extends REToken {
   
     /* Implements abstract method REToken.match() */
     boolean match(CharIndexed input, REMatch mymatch) { 
-	if (firstToken == null) return next(input, mymatch);
+	if (firstToken == null) {
+	    return next(input, mymatch);
+	}
 
 	// Note the start of this subexpression
 	mymatch.start[subIndex] = mymatch.index;
@@ -1089,23 +1575,34 @@ public class RE extends REToken {
   }
 
   REMatch getMatchImpl(CharIndexed input, int anchor, int eflags, StringBuffer buffer) {
+      boolean tryEntireMatch = ((eflags & REG_TRY_ENTIRE_MATCH) != 0);
+      RE re = (tryEntireMatch ? (RE) this.clone() : this);
+      if (tryEntireMatch) {
+	  re.chain(new RETokenEnd(0, null));
+      }
       // Create a new REMatch to hold results
       REMatch mymatch = new REMatch(numSubs, anchor, eflags);
       do {
 	  // Optimization: check if anchor + minimumLength > length
 	  if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) {
-	      if (match(input, mymatch)) {
-		  // Find longest match of them all to observe leftmost longest
-		  REMatch longest = mymatch;
+	      if (re.match(input, mymatch)) {
+		  REMatch best = mymatch;
+		  // We assume that the match that coms first is the best.
+		  // And the following "The longer, the better" rule has
+		  // been commented out. The longest is not neccesarily
+		  // the best. For example, "a" out of "aaa" is the best
+		  // match for /a+?/.
+		  /*
+		  // Find best match of them all to observe leftmost longest
 		  while ((mymatch = mymatch.next) != null) {
-		      if (mymatch.index > longest.index) {
-			  longest = mymatch;
+		      if (mymatch.index > best.index) {
+		   	best = mymatch;
 		      }
 		  }
-		  
-		  longest.end[0] = longest.index;
-		  longest.finish(input);
-		  return longest;
+		  */
+		  best.end[0] = best.index;
+		  best.finish(input);
+		  return best;
 	      }
 	  }
 	  mymatch.clear(++anchor);
@@ -1216,8 +1713,7 @@ public class RE extends REToken {
     StringBuffer buffer = new StringBuffer();
     REMatch m = getMatchImpl(input,index,eflags,buffer);
     if (m==null) return buffer.toString();
-    buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
-		   replace : m.substituteInto(replace) );
+    buffer.append(getReplacement(replace, m, eflags));
     if (input.move(m.end[0])) {
       do {
 	buffer.append(input.charAt(0));
@@ -1278,8 +1774,7 @@ public class RE extends REToken {
     StringBuffer buffer = new StringBuffer();
     REMatch m;
     while ((m = getMatchImpl(input,index,eflags,buffer)) != null) {
-	buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
-		       replace : m.substituteInto(replace) );
+      buffer.append(getReplacement(replace, m, eflags));
       index = m.getEndIndex();
       if (m.end[0] == 0) {
 	char ch = input.charAt(0);
@@ -1294,11 +1789,50 @@ public class RE extends REToken {
     }
     return buffer.toString();
   }
+
+  public static String getReplacement(String replace, REMatch m, int eflags) {
+    if ((eflags & REG_NO_INTERPOLATE) > 0)
+      return replace;
+    else {
+      if ((eflags & REG_REPLACE_USE_BACKSLASHESCAPE) > 0) {
+        StringBuffer sb = new StringBuffer();
+        int l = replace.length();
+        for (int i = 0; i < l; i++) {
+	    char c = replace.charAt(i);
+            switch(c) {
+            case '\\':
+              i++;
+              // Let StringIndexOutOfBoundsException be thrown.
+              sb.append(replace.charAt(i));
+              break;
+            case '$':
+	      int i1 = i + 1;
+	      while (i1 < replace.length() &&
+		Character.isDigit(replace.charAt(i1))) i1++;
+              sb.append(m.substituteInto(replace.substring(i, i1)));
+              i = i1 - 1;
+              break;
+            default:
+              sb.append(c);
+            }
+        }
+        return sb.toString();
+      }
+      else
+        return m.substituteInto(replace);
+    }
+  }	
   
   /* Helper function for constructor */
   private void addToken(REToken next) {
     if (next == null) return;
     minimumLength += next.getMinimumLength();
+    int nmax = next.getMaximumLength();
+    if (nmax < Integer.MAX_VALUE && maximumLength < Integer.MAX_VALUE)
+	maximumLength += nmax;
+    else 
+	maximumLength = Integer.MAX_VALUE;
+
     if (firstToken == null) {
 	lastToken = firstToken = next;
     } else {
diff --git a/libjava/classpath/gnu/regexp/REMatch.java b/libjava/classpath/gnu/regexp/REMatch.java
index cf25bb331c5..91a3c0249c0 100644
--- a/libjava/classpath/gnu/regexp/REMatch.java
+++ b/libjava/classpath/gnu/regexp/REMatch.java
@@ -1,5 +1,5 @@
 /* gnu/regexp/REMatch.java
-   Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2006 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -67,6 +67,10 @@ public final class REMatch implements Serializable, Cloneable {
     int[] start; // start positions (relative to offset) for each (sub)exp.
     int[] end;   // end positions for the same
     REMatch next; // other possibility (to avoid having to use arrays)
+    boolean empty; // empty string matched. This flag is used only within
+		   // RETokenRepeated.
+    int matchFlags; // flags passed to match methods
+    static final int MF_FIND_ALL = 0x01;
 
     public Object clone() {
 	try {
@@ -177,7 +181,9 @@ public final class REMatch implements Serializable, Cloneable {
      * @param sub Index of the subexpression.
      */
     public String toString(int sub) {
-	if ((sub >= start.length) || (start[sub] == -1)) return "";
+	if ((sub >= start.length) || sub < 0)
+	    throw new IndexOutOfBoundsException("No group " + sub);
+	if (start[sub] == -1) return null;
 	return (matchedText.substring(start[sub],end[sub]));
     }
     
@@ -242,6 +248,8 @@ public final class REMatch implements Serializable, Cloneable {
      * <code>$0</code> through <code>$9</code>.  <code>$0</code> matches
      * the full substring matched; <code>$<i>n</i></code> matches
      * subexpression number <i>n</i>.
+     * <code>$10, $11, ...</code> may match the 10th, 11th, ... subexpressions
+     * if such subexpressions exist.
      *
      * @param input A string consisting of literals and <code>$<i>n</i></code> tokens.
      */
@@ -252,6 +260,16 @@ public final class REMatch implements Serializable, Cloneable {
 	for (pos = 0; pos < input.length()-1; pos++) {
 	    if ((input.charAt(pos) == '$') && (Character.isDigit(input.charAt(pos+1)))) {
 		int val = Character.digit(input.charAt(++pos),10);
+		int pos1 = pos + 1;
+		while (pos1 < input.length() &&
+		       Character.isDigit(input.charAt(pos1))) {
+		    int val1 = val*10 + Character.digit(input.charAt(pos1),10);
+		    if (val1 >= start.length) break;
+		    pos1++;
+		    val = val1;
+		}
+		pos = pos1 - 1;
+
 		if (val < start.length) {
 		    output.append(toString(val));
 		} 
@@ -260,4 +278,42 @@ public final class REMatch implements Serializable, Cloneable {
 	if (pos < input.length()) output.append(input.charAt(pos));
 	return output.toString();
     }
+
+    static class REMatchList {
+        REMatch head;
+	REMatch tail;
+        REMatchList() {
+	    head = tail = null;
+	}
+	/* Not used now. But we may need this some day?
+	void addHead(REMatch newone) {
+            if (head == null) {
+                head = newone;
+                tail = newone;
+                while (tail.next != null) {
+                    tail = tail.next;
+                }
+            }
+	    else {
+                REMatch tmp = newone;
+                while (tmp.next != null) tmp = tmp.next;
+                tmp.next = head;
+	        head = newone;
+	    }
+	}
+	*/
+	void addTail(REMatch newone) {
+            if (head == null) {
+                head = newone;
+                tail = newone;
+            }
+            else {
+                tail.next = newone;
+            }
+            while (tail.next != null) {
+                tail = tail.next;
+            }
+	}
+    }
+
 }
diff --git a/libjava/classpath/gnu/regexp/RESyntax.java b/libjava/classpath/gnu/regexp/RESyntax.java
index 7272b03481b..81fd999bfcf 100644
--- a/libjava/classpath/gnu/regexp/RESyntax.java
+++ b/libjava/classpath/gnu/regexp/RESyntax.java
@@ -1,5 +1,5 @@
 /* gnu/regexp/RESyntax.java
-   Copyright (C) 1998-2002, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2006 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -202,7 +202,37 @@ public final class RESyntax implements Serializable {
    */
   public static final int RE_POSSESSIVE_OPS            = 25;
 
-  private static final int BIT_TOTAL                   = 26;
+  /**
+   * Syntax bit.  Allow embedded flags, (?is-x), as in Perl5.
+   */
+  public static final int RE_EMBEDDED_FLAGS            = 26;
+
+  /**
+   * Syntax bit.  Allow octal char (\0377), as in Perl5.
+   */
+  public static final int RE_OCTAL_CHAR                = 27;
+
+  /**
+   * Syntax bit.  Allow hex char (\x1b), as in Perl5.
+   */
+  public static final int RE_HEX_CHAR                  = 28;
+
+  /**
+   * Syntax bit.  Allow Unicode char (\u1234), as in Java 1.4.
+   */
+  public static final int RE_UNICODE_CHAR              = 29;
+
+  /**
+   * Syntax bit.  Allow named property (\p{P}, \P{p}), as in Perl5.
+   */
+  public static final int RE_NAMED_PROPERTY            = 30;
+
+  /**
+   * Syntax bit.  Allow nested characterclass ([a-z&&[^p-r]]), as in Java 1.4.
+   */
+  public static final int RE_NESTED_CHARCLASS          = 31;
+
+  private static final int BIT_TOTAL                   = 32;
 
   /**
    * Predefined syntax.
@@ -422,6 +452,10 @@ public final class RESyntax implements Serializable {
 	  .set(RE_STRING_ANCHORS)         // \A,\Z
 	  .set(RE_CHAR_CLASS_ESC_IN_LISTS)// \d,\D,\w,\W,\s,\S within []
 	  .set(RE_COMMENTS)              // (?#)
+	  .set(RE_EMBEDDED_FLAGS)         // (?imsx-imsx)
+	  .set(RE_OCTAL_CHAR)             // \0377
+	  .set(RE_HEX_CHAR)               // \x1b
+	  .set(RE_NAMED_PROPERTY)         // \p{prop}, \P{prop}
 	  .makeFinal();
       
       RE_SYNTAX_PERL5_S = new RESyntax(RE_SYNTAX_PERL5)
@@ -431,6 +465,8 @@ public final class RESyntax implements Serializable {
       RE_SYNTAX_JAVA_1_4 = new RESyntax(RE_SYNTAX_PERL5)
 	  // XXX
 	  .set(RE_POSSESSIVE_OPS)         // *+,?+,++,{}+
+	  .set(RE_UNICODE_CHAR)           // \u1234
+	  .set(RE_NESTED_CHARCLASS)       // [a-z&&[^p-r]]
 	  .makeFinal();
   }
 
diff --git a/libjava/classpath/gnu/regexp/REToken.java b/libjava/classpath/gnu/regexp/REToken.java
index 4eae9ec473c..5f4659b21ac 100644
--- a/libjava/classpath/gnu/regexp/REToken.java
+++ b/libjava/classpath/gnu/regexp/REToken.java
@@ -38,12 +38,21 @@ exception statement from your version. */
 package gnu.regexp;
 import java.io.Serializable;
 
-abstract class REToken implements Serializable {
+abstract class REToken implements Serializable, Cloneable {
 
   protected REToken next = null;
   protected REToken uncle = null;
   protected int subIndex;
 
+  public Object clone() {
+    try {
+      REToken copy = (REToken) super.clone();
+      return copy;
+    } catch (CloneNotSupportedException e) {
+      throw new Error(); // doesn't happen
+    }
+  }
+
   protected REToken(int subIndex) {
       this.subIndex = subIndex;
   }
@@ -52,6 +61,10 @@ abstract class REToken implements Serializable {
     return 0;
   }
 
+  int getMaximumLength() {
+    return Integer.MAX_VALUE;
+  }
+
   void setUncle(REToken anUncle) {
     uncle = anUncle;
   }
diff --git a/libjava/classpath/gnu/regexp/RETokenAny.java b/libjava/classpath/gnu/regexp/RETokenAny.java
index ac032dcb3bf..2b0967a79a1 100644
--- a/libjava/classpath/gnu/regexp/RETokenAny.java
+++ b/libjava/classpath/gnu/regexp/RETokenAny.java
@@ -55,6 +55,10 @@ final class RETokenAny extends REToken {
     return 1;
   }
 
+  int getMaximumLength() {
+    return 1;
+  }
+
     boolean match(CharIndexed input, REMatch mymatch) {
     char ch = input.charAt(mymatch.index);
     if ((ch == CharIndexed.OUT_OF_BOUNDS)
diff --git a/libjava/classpath/gnu/regexp/RETokenBackRef.java b/libjava/classpath/gnu/regexp/RETokenBackRef.java
index 674822abd70..060a6cf7d20 100644
--- a/libjava/classpath/gnu/regexp/RETokenBackRef.java
+++ b/libjava/classpath/gnu/regexp/RETokenBackRef.java
@@ -51,13 +51,25 @@ final class RETokenBackRef extends REToken {
   // should implement getMinimumLength() -- any ideas?
 
     boolean match(CharIndexed input, REMatch mymatch) {
+	if (num >= mymatch.start.length) return false;
+	if (num >= mymatch.end.length) return false;
 	int b,e;
 	b = mymatch.start[num];
 	e = mymatch.end[num];
 	if ((b==-1)||(e==-1)) return false; // this shouldn't happen, but...
 	for (int i=b; i<e; i++) {
-	    if (input.charAt(mymatch.index+i-b) != input.charAt(i)) {
-		return false;
+	    char c1 = input.charAt(mymatch.index+i-b);
+	    char c2 = input.charAt(i);
+	    if (c1 != c2) {
+		if (insens) {
+		    if (c1 != Character.toLowerCase(c2) &&
+			c1 != Character.toUpperCase(c2)) {
+			return false;
+		    }
+		}
+		else {
+		    return false;
+		}
 	    }
 	}
 	mymatch.index += e-b;
diff --git a/libjava/classpath/gnu/regexp/RETokenChar.java b/libjava/classpath/gnu/regexp/RETokenChar.java
index a15449b2d96..5c087c68778 100644
--- a/libjava/classpath/gnu/regexp/RETokenChar.java
+++ b/libjava/classpath/gnu/regexp/RETokenChar.java
@@ -52,6 +52,10 @@ final class RETokenChar extends REToken {
     return ch.length;
   }
   
+  int getMaximumLength() {
+    return ch.length;
+  }
+  
     boolean match(CharIndexed input, REMatch mymatch) {
 	int z = ch.length;
 	char c;
@@ -68,7 +72,7 @@ final class RETokenChar extends REToken {
 
   // Overrides REToken.chain() to optimize for strings
   boolean chain(REToken next) {
-    if (next instanceof RETokenChar) {
+    if (next instanceof RETokenChar && ((RETokenChar)next).insens == insens) {
       RETokenChar cnext = (RETokenChar) next;
       // assume for now that next can only be one character
       int newsize = ch.length + cnext.ch.length;
diff --git a/libjava/classpath/gnu/regexp/RETokenEnd.java b/libjava/classpath/gnu/regexp/RETokenEnd.java
index 70483b746a9..788a964da41 100644
--- a/libjava/classpath/gnu/regexp/RETokenEnd.java
+++ b/libjava/classpath/gnu/regexp/RETokenEnd.java
@@ -49,6 +49,10 @@ final class RETokenEnd extends REToken {
     this.newline = newline;
   }
 
+  int getMaximumLength() {
+    return 0;
+  }
+
     boolean match(CharIndexed input, REMatch mymatch) {
 	char ch = input.charAt(mymatch.index);
 	if (ch == CharIndexed.OUT_OF_BOUNDS)
diff --git a/libjava/classpath/gnu/regexp/RETokenEndSub.java b/libjava/classpath/gnu/regexp/RETokenEndSub.java
index f3bb4f2e131..fe2969d0592 100644
--- a/libjava/classpath/gnu/regexp/RETokenEndSub.java
+++ b/libjava/classpath/gnu/regexp/RETokenEndSub.java
@@ -41,6 +41,10 @@ final class RETokenEndSub extends REToken {
     RETokenEndSub(int subIndex) {
 	super(subIndex);
     }
+
+    int getMaximumLength() {
+      return 0;
+    }
     
     boolean match(CharIndexed input, REMatch mymatch) {
 	mymatch.end[subIndex] = mymatch.index;
diff --git a/libjava/classpath/gnu/regexp/RETokenIndependent.java b/libjava/classpath/gnu/regexp/RETokenIndependent.java
new file mode 100644
index 00000000000..2eb14722361
--- /dev/null
+++ b/libjava/classpath/gnu/regexp/RETokenIndependent.java
@@ -0,0 +1,76 @@
+/* gnu/regexp/RETokenIndependent.java
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+
+/**
+ * @author Ito Kazumitsu
+ */
+final class RETokenIndependent extends REToken
+{
+  REToken re;
+
+  RETokenIndependent(REToken re) throws REException {
+    super(0);
+    this.re = re;
+  }
+
+  int getMinimumLength() {
+    return re.getMinimumLength();
+  }
+
+  int getMaximumLength() {
+    return re.getMaximumLength();
+  }
+
+  boolean match(CharIndexed input, REMatch mymatch)
+  {
+    if (re.match(input, mymatch)) {
+      // Once we have found a match, we do not see other possible matches.
+      mymatch.next = null;
+      return next(input, mymatch);
+    }
+    return false;
+  }
+
+    void dump(StringBuffer os) {
+	os.append("(?>");
+	re.dumpAll(os);
+	os.append(')');
+    }
+}
+
diff --git a/libjava/classpath/gnu/regexp/RETokenLookAhead.java b/libjava/classpath/gnu/regexp/RETokenLookAhead.java
index 33eaec9fac1..b44dfa50c4f 100644
--- a/libjava/classpath/gnu/regexp/RETokenLookAhead.java
+++ b/libjava/classpath/gnu/regexp/RETokenLookAhead.java
@@ -52,6 +52,10 @@ final class RETokenLookAhead extends REToken
     this.negative = negative;
   }
 
+  int getMaximumLength() {
+    return 0;
+  }
+
   boolean match(CharIndexed input, REMatch mymatch)
   {
     REMatch trymatch = (REMatch)mymatch.clone();
diff --git a/libjava/classpath/gnu/regexp/RETokenLookBehind.java b/libjava/classpath/gnu/regexp/RETokenLookBehind.java
new file mode 100644
index 00000000000..a6c1b34cb0b
--- /dev/null
+++ b/libjava/classpath/gnu/regexp/RETokenLookBehind.java
@@ -0,0 +1,116 @@
+/* gnu/regexp/RETokenLookBehind.java
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+
+/**
+ * @author Ito Kazumitsu
+ */
+final class RETokenLookBehind extends REToken
+{
+  REToken re;
+  boolean negative;
+
+  RETokenLookBehind(REToken re, boolean negative) throws REException {
+    super(0);
+    this.re = re;
+    this.negative = negative;
+  }
+
+  int getMaximumLength() {
+    return 0;
+  }
+
+  boolean match(CharIndexed input, REMatch mymatch)
+  {
+    int max = re.getMaximumLength();
+    CharIndexed behind = input.lookBehind(mymatch.index, max);
+    REMatch trymatch = (REMatch)mymatch.clone();
+    REMatch trymatch1 = (REMatch)mymatch.clone();
+    REMatch newMatch = null;
+    int curIndex = trymatch.index + behind.length() - input.length();
+    trymatch.index = 0;
+    RETokenMatchHereOnly stopper = new RETokenMatchHereOnly(curIndex);
+    REToken re1 = (REToken) re.clone();
+    re1.chain(stopper);
+    if (re1.match(behind, trymatch)) {
+      if (negative) return false;
+      if (next(input, trymatch1))
+        newMatch = trymatch1;
+    }
+
+    if (newMatch != null) {
+      if (negative) return false;
+      //else
+      mymatch.assignFrom(newMatch);
+      return true;
+    }
+    else { // no match
+      if (negative)
+        return next(input, mymatch);
+      //else
+      return false;
+    }
+  }
+
+    void dump(StringBuffer os) {
+	os.append("(?<");
+	os.append(negative ? '!' : '=');
+	re.dumpAll(os);
+	os.append(')');
+    }
+
+    private static class RETokenMatchHereOnly extends REToken {
+
+        int getMaximumLength() { return 0; }
+
+	private int index;
+
+	RETokenMatchHereOnly(int index) {
+	    super(0);
+	    this.index = index;
+	}
+
+	boolean match(CharIndexed input, REMatch mymatch) {
+	    return index == mymatch.index;
+	}
+
+        void dump(StringBuffer os) {}
+
+    }
+}
+
diff --git a/libjava/classpath/gnu/regexp/RETokenNamedProperty.java b/libjava/classpath/gnu/regexp/RETokenNamedProperty.java
new file mode 100644
index 00000000000..13c1e418a09
--- /dev/null
+++ b/libjava/classpath/gnu/regexp/RETokenNamedProperty.java
@@ -0,0 +1,301 @@
+/* gnu/regexp/RETokenNamedProperty.java
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.regexp;
+
+final class RETokenNamedProperty extends REToken {
+  String name;
+  boolean insens;
+  boolean negate;
+  Handler handler;
+
+  // Grouped properties
+  static final byte[] LETTER = new byte[]
+  { Character.LOWERCASE_LETTER,
+    Character.UPPERCASE_LETTER,
+    Character.TITLECASE_LETTER,
+    Character.MODIFIER_LETTER,
+    Character.OTHER_LETTER };
+  
+  static final byte[] MARK = new byte[]
+  { Character.NON_SPACING_MARK,
+    Character.COMBINING_SPACING_MARK,
+    Character.ENCLOSING_MARK };
+  
+  static final byte[] SEPARATOR = new byte[]
+  { Character.SPACE_SEPARATOR,
+    Character.LINE_SEPARATOR,
+    Character.PARAGRAPH_SEPARATOR };
+  
+  static final byte[] SYMBOL = new byte[]
+  { Character.MATH_SYMBOL,
+    Character.CURRENCY_SYMBOL,
+    Character.MODIFIER_SYMBOL,
+    Character.OTHER_SYMBOL };
+  
+  static final byte[] NUMBER = new byte[]
+  { Character.DECIMAL_DIGIT_NUMBER,
+    Character.LETTER_NUMBER,
+    Character.OTHER_NUMBER };
+  
+  static final byte[] PUNCTUATION = new byte[]
+  { Character.DASH_PUNCTUATION,
+    Character.START_PUNCTUATION,
+    Character.END_PUNCTUATION,
+    Character.CONNECTOR_PUNCTUATION,
+    Character.OTHER_PUNCTUATION,
+    Character.INITIAL_QUOTE_PUNCTUATION,
+    Character.FINAL_QUOTE_PUNCTUATION};
+  
+  static final byte[] OTHER = new byte[]
+  { Character.CONTROL,
+    Character.FORMAT,
+    Character.PRIVATE_USE,
+    Character.SURROGATE,
+    Character.UNASSIGNED };
+
+  RETokenNamedProperty(int subIndex, String name, boolean insens, boolean negate) throws REException {
+    super(subIndex);
+    this.name = name;
+    this.insens = insens;
+    this.negate = negate;
+    handler = getHandler(name); 
+  }
+
+    int getMinimumLength() {
+	return 1;
+    }
+
+    int getMaximumLength() {
+	return 1;
+    }
+
+    boolean match(CharIndexed input, REMatch mymatch) {
+    char ch = input.charAt(mymatch.index);
+    if (ch == CharIndexed.OUT_OF_BOUNDS)
+      return false;
+    
+    boolean retval = handler.includes(ch);
+    if (insens) {
+        retval = retval ||
+                 handler.includes(Character.toUpperCase(ch)) ||
+                 handler.includes(Character.toLowerCase(ch));
+    }
+
+    if (negate) retval = !retval;
+    if (retval) {
+	++mymatch.index;
+	return next(input, mymatch);
+    }
+    else return false;
+  }
+
+  void dump(StringBuffer os) {
+    os.append("\\")
+      .append(negate ? "P" : "p")
+      .append("{" + name + "}");
+  }
+
+  private abstract static class Handler {
+      public abstract boolean includes(char c);
+  }
+
+  private Handler getHandler(String name) throws REException {
+      if (name.equals("Lower") ||
+          name.equals("Upper") ||
+          // name.equals("ASCII") ||
+          name.equals("Alpha") ||
+          name.equals("Digit") ||
+          name.equals("Alnum") ||
+          name.equals("Punct") ||
+          name.equals("Graph") ||
+          name.equals("Print") ||
+          name.equals("Blank") ||
+          name.equals("Cntrl") ||
+          name.equals("XDigit") ||
+          name.equals("Space") ) {
+         return new POSIXHandler(name);
+      }
+      if (name.startsWith("In")) {
+	  try {
+	      name = name.substring(2);
+	      Character.UnicodeBlock block = Character.UnicodeBlock.forName(name);
+	      return new UnicodeBlockHandler(block);
+	  }
+	  catch (IllegalArgumentException e) {
+              throw new REException("Invalid Unicode block name: " + name, REException.REG_ESCAPE, 0);
+	  }
+      }
+      if (name.startsWith("Is")) {
+          name = name.substring(2);
+      }
+
+      // "grouped properties"
+      if (name.equals("L"))
+	  return new UnicodeCategoriesHandler(LETTER);
+      if (name.equals("M"))
+	  return new UnicodeCategoriesHandler(MARK);
+      if (name.equals("Z"))
+	  return new UnicodeCategoriesHandler(SEPARATOR);
+      if (name.equals("S"))
+	  return new UnicodeCategoriesHandler(SYMBOL);
+      if (name.equals("N"))
+	  return new UnicodeCategoriesHandler(NUMBER);
+      if (name.equals("P"))
+	  return new UnicodeCategoriesHandler(PUNCTUATION);
+      if (name.equals("C"))
+	  return new UnicodeCategoriesHandler(OTHER);
+
+      if (name.equals("Mc"))
+          return new UnicodeCategoryHandler(Character.COMBINING_SPACING_MARK);
+      if (name.equals("Pc"))
+          return new UnicodeCategoryHandler(Character.CONNECTOR_PUNCTUATION);
+      if (name.equals("Cc"))
+          return new UnicodeCategoryHandler(Character.CONTROL);
+      if (name.equals("Sc"))
+          return new UnicodeCategoryHandler(Character.CURRENCY_SYMBOL);
+      if (name.equals("Pd"))
+          return new UnicodeCategoryHandler(Character.DASH_PUNCTUATION);
+      if (name.equals("Nd"))
+          return new UnicodeCategoryHandler(Character.DECIMAL_DIGIT_NUMBER);
+      if (name.equals("Me"))
+          return new UnicodeCategoryHandler(Character.ENCLOSING_MARK);
+      if (name.equals("Pe"))
+          return new UnicodeCategoryHandler(Character.END_PUNCTUATION);
+      if (name.equals("Pf"))
+          return new UnicodeCategoryHandler(Character.FINAL_QUOTE_PUNCTUATION);
+      if (name.equals("Cf"))
+          return new UnicodeCategoryHandler(Character.FORMAT);
+      if (name.equals("Pi"))
+          return new UnicodeCategoryHandler(Character.INITIAL_QUOTE_PUNCTUATION);
+      if (name.equals("Nl"))
+          return new UnicodeCategoryHandler(Character.LETTER_NUMBER);
+      if (name.equals("Zl"))
+          return new UnicodeCategoryHandler(Character.LINE_SEPARATOR);
+      if (name.equals("Ll"))
+          return new UnicodeCategoryHandler(Character.LOWERCASE_LETTER);
+      if (name.equals("Sm"))
+          return new UnicodeCategoryHandler(Character.MATH_SYMBOL);
+      if (name.equals("Lm"))
+          return new UnicodeCategoryHandler(Character.MODIFIER_LETTER);
+      if (name.equals("Sk"))
+          return new UnicodeCategoryHandler(Character.MODIFIER_SYMBOL);
+      if (name.equals("Mn"))
+          return new UnicodeCategoryHandler(Character.NON_SPACING_MARK);
+      if (name.equals("Lo"))
+          return new UnicodeCategoryHandler(Character.OTHER_LETTER);
+      if (name.equals("No"))
+          return new UnicodeCategoryHandler(Character.OTHER_NUMBER);
+      if (name.equals("Po"))
+          return new UnicodeCategoryHandler(Character.OTHER_PUNCTUATION);
+      if (name.equals("So"))
+          return new UnicodeCategoryHandler(Character.OTHER_SYMBOL);
+      if (name.equals("Zp"))
+          return new UnicodeCategoryHandler(Character.PARAGRAPH_SEPARATOR);
+      if (name.equals("Co"))
+          return new UnicodeCategoryHandler(Character.PRIVATE_USE);
+      if (name.equals("Zs"))
+          return new UnicodeCategoryHandler(Character.SPACE_SEPARATOR);
+      if (name.equals("Ps"))
+          return new UnicodeCategoryHandler(Character.START_PUNCTUATION);
+      if (name.equals("Cs"))
+          return new UnicodeCategoryHandler(Character.SURROGATE);
+      if (name.equals("Lt"))
+          return new UnicodeCategoryHandler(Character.TITLECASE_LETTER);
+      if (name.equals("Cn"))
+          return new UnicodeCategoryHandler(Character.UNASSIGNED);
+      if (name.equals("Lu"))
+          return new UnicodeCategoryHandler(Character.UPPERCASE_LETTER);
+      throw new REException("unsupported name " + name, REException.REG_ESCAPE, 0);
+  }
+
+  private static class POSIXHandler extends Handler {
+      private RETokenPOSIX retoken;
+      private REMatch mymatch = new REMatch(0,0,0);
+      private char[] chars = new char[1];
+      private CharIndexedCharArray ca = new CharIndexedCharArray(chars, 0);
+      public POSIXHandler(String name) {
+            int posixId = RETokenPOSIX.intValue(name.toLowerCase());
+            if (posixId != -1)
+              retoken = new RETokenPOSIX(0,posixId,false,false);
+	    else
+              throw new RuntimeException("Unknown posix ID: " + name);
+      }
+      public boolean includes(char c) {
+          chars[0] = c;
+          mymatch.index = 0;
+          return retoken.match(ca, mymatch);
+      }
+  }
+
+  private static class UnicodeCategoryHandler extends Handler {
+      public UnicodeCategoryHandler(byte category) {
+          this.category = (int)category;
+      }
+      private int category;
+      public boolean includes(char c) {
+          return Character.getType(c) == category;
+      }
+  }
+
+  private static class UnicodeCategoriesHandler extends Handler {
+      public UnicodeCategoriesHandler(byte[] categories) {
+          this.categories = categories;
+      }
+      private byte[] categories;
+      public boolean includes(char c) {
+	  int category = Character.getType(c);
+          for (int i = 0; i < categories.length; i++)
+              if (category == categories[i])
+	          return true;
+	  return false;
+      }
+  }
+
+  private static class UnicodeBlockHandler extends Handler {
+      public UnicodeBlockHandler(Character.UnicodeBlock block) {
+	  this.block = block;
+      }
+      private Character.UnicodeBlock block;
+      public boolean includes(char c) {
+	  Character.UnicodeBlock cblock = Character.UnicodeBlock.of(c);
+	  return (cblock != null && cblock.equals(block));
+      }
+  }
+
+}
diff --git a/libjava/classpath/gnu/regexp/RETokenOneOf.java b/libjava/classpath/gnu/regexp/RETokenOneOf.java
index 3f6e89e2103..260bc4b8f67 100644
--- a/libjava/classpath/gnu/regexp/RETokenOneOf.java
+++ b/libjava/classpath/gnu/regexp/RETokenOneOf.java
@@ -1,5 +1,5 @@
 /* gnu/regexp/RETokenOneOf.java
-   Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2006 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -37,11 +37,35 @@ exception statement from your version. */
 
 package gnu.regexp;
 import java.util.Vector;
+import java.util.Stack;
 
 final class RETokenOneOf extends REToken {
   private Vector options;
   private boolean negative;
 
+  private Vector addition;
+  // This Vector addition is used to store nested character classes.
+  // For example, if the original expression is
+  //    [2-7a-c[f-k][m-z]&&[^p-v][st]]
+  // the basic part /2-7a-c/ is stored in the Vector options, and
+  // the additional part /[f-k][m-z]&&[^p-v][st]/ is stored in the
+  // Vector addition in the following order (Reverse Polish Notation):
+  //           -- The matching result of the basic part is assumed here. 
+  //    [f-k]  -- REToken
+  //    "|"    -- or
+  //    [m-z]  -- REToken
+  //    "|"    -- or
+  //    false
+  //    [^p-v] -- REToken
+  //    "|"    -- or
+  //    [st]   -- REToken
+  //    "|"    -- or
+  //    "&"    -- and
+  //
+  // As it is clear from the explanation above, the Vector addition is
+  // effective only when this REToken originates from a character class
+  // expression.
+
   // This constructor is used for convenience when we know the set beforehand,
   // e.g. \d --> new RETokenOneOf("0123456789",false, ..)
   //      \D --> new RETokenOneOf("0123456789",true, ..)
@@ -60,7 +84,17 @@ final class RETokenOneOf extends REToken {
     this.negative = negative;
   }
 
+  RETokenOneOf(int subIndex, Vector options, Vector addition, boolean negative) {
+    super(subIndex);
+    this.options = options;
+    this.addition = addition;
+    this.negative = negative;
+  }
+
   int getMinimumLength() {
+    // (negative || addition != null) occurs when this token originates from
+    // character class expression.
+    if (negative || addition != null) return 1;
     int min = Integer.MAX_VALUE;
     int x;
     for (int i=0; i < options.size(); i++) {
@@ -70,54 +104,123 @@ final class RETokenOneOf extends REToken {
     return min;
   }
 
+  int getMaximumLength() {
+    // (negative || addition != null) occurs when this token originates from
+    // character class expression.
+    if (negative || addition != null) return 1;
+    int max = 0;
+    int x;
+    for (int i=0; i < options.size(); i++) {
+      if ((x = ((REToken) options.elementAt(i)).getMaximumLength()) > max)
+	max = x;
+    }
+    return max;
+  }
+
     boolean match(CharIndexed input, REMatch mymatch) {
-    if (negative && (input.charAt(mymatch.index) == CharIndexed.OUT_OF_BOUNDS)) 
+      REMatch tryMatch;
+      boolean tryOnly;
+      if (addition == null) {
+	  tryMatch = mymatch;
+	  tryOnly = false;
+      }
+      else {
+	  tryMatch = (REMatch) mymatch.clone();
+	  tryOnly = true;
+      }
+      boolean b = negative ?
+        matchN(input, tryMatch, tryOnly) :
+        matchP(input, tryMatch, tryOnly);
+      if (addition == null) return b;
+
+      Stack stack = new Stack();
+      stack.push(new Boolean(b));
+      for (int i=0; i < addition.size(); i++) {
+	Object obj = addition.elementAt(i);
+	if (obj instanceof REToken) {
+	  b = ((REToken)obj).match(input, (REMatch)mymatch.clone());
+	  stack.push(new Boolean(b));
+	}
+	else if (obj instanceof Boolean) {
+	  stack.push(obj);
+	}
+	else if (obj.equals("|")) {
+	  b = ((Boolean)stack.pop()).booleanValue();
+	  b = ((Boolean)stack.pop()).booleanValue() || b;
+	  stack.push(new Boolean(b));
+	}
+	else if (obj.equals("&")) {
+	  b = ((Boolean)stack.pop()).booleanValue();
+	  b = ((Boolean)stack.pop()).booleanValue() && b;
+	  stack.push(new Boolean(b));
+	}
+	else {
+	  throw new RuntimeException("Invalid object found");
+	}
+      }
+      b = ((Boolean)stack.pop()).booleanValue();
+      if (b) {
+        ++mymatch.index;
+        return next(input, mymatch);
+      }
       return false;
+    }
 
-    REMatch newMatch = null;
-    REMatch last = null;
-    REToken tk;
-    boolean isMatch;
-    for (int i=0; i < options.size(); i++) {
+    private boolean matchN(CharIndexed input, REMatch mymatch, boolean tryOnly) {
+      if (input.charAt(mymatch.index) == CharIndexed.OUT_OF_BOUNDS) 
+        return false;
+
+      REMatch newMatch = null;
+      REMatch last = null;
+      REToken tk;
+      for (int i=0; i < options.size(); i++) {
 	tk = (REToken) options.elementAt(i);
 	REMatch tryMatch = (REMatch) mymatch.clone();
 	if (tk.match(input, tryMatch)) { // match was successful
-	    if (negative) return false;
-
-	    if (next(input, tryMatch)) {
-		// Add tryMatch to list of possibilities.
-		if (last == null) {
-		    newMatch = tryMatch;
-		    last = tryMatch;
-		} else {
-		    last.next = tryMatch;
-		    last = tryMatch;
-		}
-	    } // next succeeds
-	} // is a match
-    } // try next option
-
-    if (newMatch != null) {
-	if (negative) {
 	    return false;
-	} else {
-	    // set contents of mymatch equal to newMatch
+	} // is a match
+      } // try next option
 
-	    // try each one that matched
-	    mymatch.assignFrom(newMatch);
-	    return true;
-	}
-    } else {
-	if (negative) {
-	    ++mymatch.index;
-	    return next(input, mymatch);
-	} else {
-	    return false;
-	}
+      if (tryOnly) return true;
+      ++mymatch.index;
+      return next(input, mymatch);
     }
 
-    // index+1 works for [^abc] lists, not for generic lookahead (--> index)
-  }
+    private boolean matchP(CharIndexed input, REMatch mymatch, boolean tryOnly) {
+      boolean stopMatchingIfSatisfied =
+	  (mymatch.matchFlags & REMatch.MF_FIND_ALL) == 0;
+      REMatch.REMatchList newMatch = new REMatch.REMatchList();
+      REToken tk;
+      for (int i=0; i < options.size(); i++) {
+	// In order that the backtracking can work,
+	// each option must be chained to the next token.
+	// But the chain method has some side effect, so
+	// we use clones.
+	tk = (REToken)((REToken) options.elementAt(i)).clone();
+	if (! tryOnly) {
+	  tk.chain(this.next);
+	  tk.setUncle(this.uncle);
+	  tk.subIndex = this.subIndex;
+        }
+	REMatch tryMatch = (REMatch) mymatch.clone();
+	if (tk.match(input, tryMatch)) { // match was successful
+	  if (tryOnly) return true;
+	  newMatch.addTail(tryMatch);
+	  if (stopMatchingIfSatisfied) break;
+	} // is a match
+      } // try next option
+      if (tryOnly) return false;
+
+      if (newMatch.head != null) {
+	  // set contents of mymatch equal to newMatch
+
+	  // try each one that matched
+	  mymatch.assignFrom(newMatch.head);
+	  return true;
+      } else {
+	  return false;
+      }
+    }
 
   void dump(StringBuffer os) {
     os.append(negative ? "[^" : "(?:");
diff --git a/libjava/classpath/gnu/regexp/RETokenPOSIX.java b/libjava/classpath/gnu/regexp/RETokenPOSIX.java
index bbb8066bca8..4182c6fab98 100644
--- a/libjava/classpath/gnu/regexp/RETokenPOSIX.java
+++ b/libjava/classpath/gnu/regexp/RETokenPOSIX.java
@@ -81,6 +81,10 @@ final class RETokenPOSIX extends REToken {
 	return 1;
     }
 
+    int getMaximumLength() {
+	return 1;
+    }
+
     boolean match(CharIndexed input, REMatch mymatch) {
     char ch = input.charAt(mymatch.index);
     if (ch == CharIndexed.OUT_OF_BOUNDS)
diff --git a/libjava/classpath/gnu/regexp/RETokenRange.java b/libjava/classpath/gnu/regexp/RETokenRange.java
index dadaf2d8072..8a1ac86b212 100644
--- a/libjava/classpath/gnu/regexp/RETokenRange.java
+++ b/libjava/classpath/gnu/regexp/RETokenRange.java
@@ -43,19 +43,32 @@ final class RETokenRange extends REToken {
 
   RETokenRange(int subIndex, char lo, char hi, boolean ins) {
     super(subIndex);
-    this.lo = (insens = ins) ? Character.toLowerCase(lo) : lo;
-    this.hi = ins ? Character.toLowerCase(hi) : hi;
+    insens = ins;
+    this.lo = lo;
+    this.hi = hi;
   }
 
   int getMinimumLength() {
     return 1;
   }
 
+  int getMaximumLength() {
+    return 1;
+  }
+
     boolean match(CharIndexed input, REMatch mymatch) {
 	char c = input.charAt(mymatch.index);
 	if (c == CharIndexed.OUT_OF_BOUNDS) return false;
-	if (insens) c = Character.toLowerCase(c);
-	if ((c >= lo) && (c <= hi)) {
+	boolean matches = (c >= lo) && (c <= hi);
+	if (! matches && insens) {
+	  char c1 = Character.toLowerCase(c);
+	  matches = (c1 >= lo) && (c1 <= hi);
+	  if (!matches) {
+	    c1 = Character.toUpperCase(c);
+	    matches = (c1 >= lo) && (c1 <= hi);
+	  }
+	}
+	if (matches) {
 	    ++mymatch.index;
 	    return next(input, mymatch);
 	}
diff --git a/libjava/classpath/gnu/regexp/RETokenRepeated.java b/libjava/classpath/gnu/regexp/RETokenRepeated.java
index 6291a3c3960..2d019c53cbd 100644
--- a/libjava/classpath/gnu/regexp/RETokenRepeated.java
+++ b/libjava/classpath/gnu/regexp/RETokenRepeated.java
@@ -1,5 +1,5 @@
 /* gnu/regexp/RETokenRepeated.java
-   Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+   Copyright (C) 2006 Free Software Foundation, Inc.
 
 This file is part of GNU Classpath.
 
@@ -39,6 +39,7 @@ exception statement from your version. */
 package gnu.regexp;
 
 import java.util.Vector;
+import java.util.Arrays;
 
 final class RETokenRepeated extends REToken {
     private REToken token;
@@ -82,6 +83,38 @@ final class RETokenRepeated extends REToken {
 	return (min * token.getMinimumLength());
     }
 
+    int getMaximumLength() {
+        if (max == Integer.MAX_VALUE) return Integer.MAX_VALUE;
+	int tmax = token.getMaximumLength();
+	if (tmax == Integer.MAX_VALUE) return tmax;
+	return (max * tmax);
+    }
+
+    private static REMatch findDoables(REToken tk,
+			CharIndexed input, REMatch mymatch) {
+
+	    REMatch.REMatchList doables = new REMatch.REMatchList();
+
+	    // try next repeat at all possible positions
+	    for (REMatch current = mymatch;
+		 current != null; current = current.next) {
+		REMatch recurrent = (REMatch) current.clone();
+		int origin = recurrent.index;
+		tk = (REToken) tk.clone();
+		tk.next = tk.uncle = null;
+		recurrent.matchFlags |= REMatch.MF_FIND_ALL;
+		if (tk.match(input, recurrent)) {
+		    for (REMatch m = recurrent; m != null; m = m.next) {
+			m.matchFlags &= ~REMatch.MF_FIND_ALL;
+		    }
+		    if (recurrent.index == origin) recurrent.empty = true;
+		    // add all items in current to doables array
+		    doables.addTail(recurrent);
+		}
+	    }
+	    return doables.head;
+    }
+
     // We do need to save every possible point, but the number of clone()
     // invocations here is really a killer for performance on non-stingy
     // repeat operators.  I'm open to suggestions...
@@ -91,59 +124,167 @@ final class RETokenRepeated extends REToken {
     // the subexpression back-reference operator allow that?
 
     boolean match(CharIndexed input, REMatch mymatch) {
-	// number of times we've matched so far
-	int numRepeats = 0; 
-	
-	// Possible positions for the next repeat to match at
-	REMatch newMatch = mymatch;
-	REMatch last = null;
-	REMatch current;
 
-	// Add the '0-repeats' index
-	// positions.elementAt(z) == position [] in input after <<z>> matches
-	Vector positions = new Vector();
-	positions.addElement(newMatch);
-	
-	// Declare variables used in loop
-	REMatch doables;
-	REMatch doablesLast;
-	REMatch recurrent;
-	int lastIndex = mymatch.index;
-
-	do {
-	    // Check for stingy match for each possibility.
-	    if (stingy && (numRepeats >= min)) {
-		REMatch result = matchRest(input, newMatch);
-		if (result != null) {
-		    mymatch.assignFrom(result);
-		    return true;
-		}
+        boolean stopMatchingIfSatisfied =
+		(mymatch.matchFlags & REMatch.MF_FIND_ALL) == 0;
+
+	REMatch newMatch = matchMinimum(input, mymatch);
+	if (newMatch == null) return false;
+
+	// Array of positions we have already visited
+	int[] visited = initVisited();
+	for (REMatch m = newMatch; m != null; m = m.next) {
+	    visited = addVisited(m.index, visited);
+	}
+
+	int max1 = decreaseMax(max, min);
+
+	newMatch = _match(input, newMatch, max1,
+	    stopMatchingIfSatisfied, visited);
+	if (newMatch != null) {
+	    mymatch.assignFrom(newMatch);
+	    return true;
+	}
+	return false;
+    }
+
+    private static int decreaseMax(int m, int n) {
+        if (m == Integer.MAX_VALUE) return m;
+	return m - n;
+    }
+
+    // Array visited is an array of character positions we have already
+    // visited. visited[0] is used to store the effective length of the
+    // array.
+    private static int[] initVisited() {
+	int[] visited = new int[32];
+	visited[0] = 0;
+	return visited;
+    }
+
+    private static boolean visitedContains(int n, int[] visited) {
+	// Experience tells that for a small array like this,
+	// simple linear search is faster than binary search.
+	for (int i = 1; i < visited[0]; i++) {
+	    if (n == visited[i]) return true;
+	}
+	return false;
+    }
+
+    private static int[] addVisited(int n, int[] visited) {
+	if (visitedContains(n, visited)) return visited;
+	if (visited[0] >= visited.length - 1) {
+	    int[] newvisited = new int[visited.length + 32];
+	    System.arraycopy(visited, 0, newvisited, 0, visited.length);
+	    visited = newvisited;
+	}
+	visited[0]++;
+	visited[visited[0]] = n;
+	return visited;
+    }
+
+    private REMatch _match(CharIndexed input, REMatch mymatch,
+    	    int max1, boolean stopMatchingIfSatisfied,
+	    int[] visited) {
+
+        if (max1 == 0) {
+	    return matchRest(input, mymatch);
+	}
+	max1 = decreaseMax(max1, 1);
+
+	REMatch.REMatchList allResults = new REMatch.REMatchList();
+
+	// Depth-first search
+
+	for (REMatch cur = mymatch; cur != null; cur = cur.next) {
+
+	    REMatch cur1 = (REMatch) cur.clone();
+
+	    if (stingy) {
+	        REMatch results = matchRest(input, cur1);
+	        if (results != null) {
+	            if (stopMatchingIfSatisfied) {
+		        return results;
+		    }
+		    allResults.addTail(results);
+	        }
 	    }
 
-	    doables = null;
-	    doablesLast = null;
+	    DO_THIS:
+	    do {
 
-	    // try next repeat at all possible positions
-	    for (current = newMatch; current != null; current = current.next) {
-		recurrent = (REMatch) current.clone();
-		if (token.match(input, recurrent)) {
-		    // add all items in current to doables array
-		    if (doables == null) {
-			doables = recurrent;
-			doablesLast = recurrent;
-		    } else {
-			// Order these from longest to shortest
-			// Start by assuming longest (more repeats)
-			doablesLast.next = recurrent;
+	    boolean emptyMatchFound = false;
+	    REMatch doables = findDoables(token, input, cur1);
+	    if (doables == null) break DO_THIS;
+	    if (doables.empty) emptyMatchFound = true;
+
+	    if (!emptyMatchFound) {
+	        REMatch.REMatchList list = new REMatch.REMatchList();
+	        for (REMatch m = doables; m != null; m = m.next) {
+	            REMatch m1 = (REMatch) m.clone();
+		    int n = m1.index;
+		    if (! visitedContains(n, visited)) {
+		        visited = addVisited(n, visited);
+		        list.addTail(m1);
 		    }
-		    // Find new doablesLast
-		    while (doablesLast.next != null) {
-			doablesLast = doablesLast.next;
+	        }
+	        if (list.head == null) break DO_THIS;
+	        doables = list.head;
+	    }
+
+	    for (REMatch m = doables; m != null; m = m.next) {
+	        if (! emptyMatchFound) {
+	            REMatch m1 = _match(input, m, max1,
+		        stopMatchingIfSatisfied, visited);
+		    if (possessive) return m1;
+		    if (m1 != null) {
+	                if (stopMatchingIfSatisfied) {
+		            return m1;
+		        }
+		        allResults.addTail(m1);
+		    }
+	        }
+		else {
+		    REMatch m1 = matchRest(input, m);
+		    if (m1 != null) {
+		        if (stopMatchingIfSatisfied) {
+		            return m1;
+		        }
+		        allResults.addTail(m1);
 		    }
 		}
 	    }
-	    // if none of the possibilities worked out, break out of do/while
-	    if (doables == null) break;
+
+	    } while (false); // DO_THIS only once;
+
+	    // This point itself is a candidate.
+	    if (!stingy) {
+		REMatch m2 = matchRest(input, cur1);
+		if (m2 != null) {
+		    if (stopMatchingIfSatisfied) {
+		        return m2;
+		    }
+		    allResults.addTail(m2);
+	        }
+	    }
+	}
+
+        return allResults.head;
+    }
+
+    private REMatch matchMinimum(CharIndexed input, final REMatch mymatch) {
+	// Possible positions for the next repeat to match at
+	REMatch newMatch = mymatch;
+
+	// number of times we've matched so far
+	int numRepeats = 0; 
+	
+	while (numRepeats < min) {
+	    REMatch doables = findDoables(token, input, newMatch);
+
+	    // if none of the possibilities worked out, 
+	    // it means that minimum number of repeats could not be found.
+	    if (doables == null) return null;
 	    
 	    // reassign where the next repeat can match
 	    newMatch = doables;
@@ -151,91 +292,24 @@ final class RETokenRepeated extends REToken {
 	    // increment how many repeats we've successfully found
 	    ++numRepeats;
 	    
-	    positions.addElement(newMatch);
-
-	    // doables.index == lastIndex means an empty string
-	    // was the longest that matched this token.
-	    // We break here, otherwise we will fall into an endless loop.
-	    if (doables.index == lastIndex) {
-		if (numRepeats < min) numRepeats = min;
-		break;
-	    }		
-	    lastIndex = doables.index;
-	} while (numRepeats < max);
-	
-	// If there aren't enough repeats, then fail
-	if (numRepeats < min) return false;
-	
-	// We're greedy, but ease off until a true match is found 
-	int posIndex = positions.size();
-	
-	// At this point we've either got too many or just the right amount.
-	// See if this numRepeats works with the rest of the regexp.
-	REMatch allResults = null;
-	REMatch allResultsLast = null;
-
-	REMatch results = null;
-	int indexCount = posIndex - min;
-	if (indexCount <= 0) {
-	    // This case occurs when we exited the previous do loop before
-	    // numRepeats >= min because an empty string matched the token.
-	    // In this case, an empty string can match as many times as
-	    // desired.
-	    indexCount = 1;
-	}
-	while (indexCount-- > 0) {
-	    --posIndex;
-	    newMatch = (REMatch) positions.elementAt(posIndex);
-	    results = matchRest(input, newMatch);
-	    if (results != null) {
-		if (allResults == null) {
-		    allResults = results;
-		    allResultsLast = results;
-		} else {
-		    // Order these from longest to shortest
-		    // Start by assuming longest (more repeats)
-		    allResultsLast.next = results;
-		}
-		// Find new doablesLast
-		while (allResultsLast.next != null) {
-		    allResultsLast = allResultsLast.next;
-		}
-	    }
-	    // else did not match rest of the tokens, try again on smaller sample
-	    // or break out when performing possessive matching
-	    if (possessive) break;
+	    if (newMatch.empty) break;
 	}
-	if (allResults != null) {
-	    mymatch.assignFrom(allResults); // does this get all?
-	    return true;
-	}
-	// If we fall out, no matches.
-	return false;
+	return newMatch;
     }
 
     private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
 	REMatch current, single;
-	REMatch doneIndex = null;
-	REMatch doneIndexLast = null;
+	REMatch.REMatchList doneIndex = new REMatch.REMatchList();
 	// Test all possible matches for this number of repeats
 	for (current = newMatch; current != null; current = current.next) {
 	    // clone() separates a single match from the chain
 	    single = (REMatch) current.clone();
 	    if (next(input, single)) {
 		// chain results to doneIndex
-		if (doneIndex == null) {
-		    doneIndex = single;
-		    doneIndexLast = single;
-		} else {
-		    doneIndexLast.next = single;
-		}
-		// Find new doneIndexLast
-		while (doneIndexLast.next != null) {
-		    doneIndexLast = doneIndexLast.next;
-		}
+		doneIndex.addTail(single);
 	    }
 	}
-	return doneIndex;
+	return doneIndex.head;
     }
 
     void dump(StringBuffer os) {
diff --git a/libjava/classpath/gnu/regexp/RETokenStart.java b/libjava/classpath/gnu/regexp/RETokenStart.java
index 8f7198237e1..42e3c0b2de0 100644
--- a/libjava/classpath/gnu/regexp/RETokenStart.java
+++ b/libjava/classpath/gnu/regexp/RETokenStart.java
@@ -44,6 +44,10 @@ class RETokenStart extends REToken {
 	super(subIndex);
 	this.newline = newline;
     }
+
+    int getMaximumLength() {
+        return 0;
+    }
     
     boolean match(CharIndexed input, REMatch mymatch) {
 	// charAt(index-n) may be unknown on a Reader/InputStream. FIXME
diff --git a/libjava/classpath/gnu/regexp/RETokenWordBoundary.java b/libjava/classpath/gnu/regexp/RETokenWordBoundary.java
index 6804151e261..f86214bbf68 100644
--- a/libjava/classpath/gnu/regexp/RETokenWordBoundary.java
+++ b/libjava/classpath/gnu/regexp/RETokenWordBoundary.java
@@ -52,6 +52,11 @@ final class RETokenWordBoundary extends REToken {
 	this.where = where;
 	this.negated = negated;
     }
+
+    int getMaximumLength() {
+        return 0;
+    }
+
     
     boolean match(CharIndexed input, REMatch mymatch) {
 	// Word boundary means input[index-1] was a word character