diff options
Diffstat (limited to 'libjava/classpath/gnu/java/util/regex/RESyntax.java')
-rw-r--r-- | libjava/classpath/gnu/java/util/regex/RESyntax.java | 369 |
1 files changed, 171 insertions, 198 deletions
diff --git a/libjava/classpath/gnu/java/util/regex/RESyntax.java b/libjava/classpath/gnu/java/util/regex/RESyntax.java index db11e2db450..38d70564d4a 100644 --- a/libjava/classpath/gnu/java/util/regex/RESyntax.java +++ b/libjava/classpath/gnu/java/util/regex/RESyntax.java @@ -51,102 +51,104 @@ import java.util.BitSet; * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A> */ -public final class RESyntax implements Serializable { - static final String DEFAULT_LINE_SEPARATOR = System.getProperty("line.separator"); +public final class RESyntax implements Serializable +{ + static final String DEFAULT_LINE_SEPARATOR = + System.getProperty ("line.separator"); - private BitSet bits; + private BitSet bits; - // true for the constant defined syntaxes - private boolean isFinal = false; + // true for the constant defined syntaxes + private boolean isFinal = false; - private String lineSeparator = DEFAULT_LINE_SEPARATOR; + private String lineSeparator = DEFAULT_LINE_SEPARATOR; // Values for constants are bit indexes /** * Syntax bit. Backslash is an escape character in lists. */ - public static final int RE_BACKSLASH_ESCAPE_IN_LISTS = 0; + public static final int RE_BACKSLASH_ESCAPE_IN_LISTS = 0; /** * Syntax bit. Use \? instead of ? and \+ instead of +. */ - public static final int RE_BK_PLUS_QM = 1; + public static final int RE_BK_PLUS_QM = 1; /** * Syntax bit. POSIX character classes ([:...:]) in lists are allowed. */ - public static final int RE_CHAR_CLASSES = 2; + public static final int RE_CHAR_CLASSES = 2; /** * Syntax bit. ^ and $ are special everywhere. * <B>Not implemented.</B> */ - public static final int RE_CONTEXT_INDEP_ANCHORS = 3; + public static final int RE_CONTEXT_INDEP_ANCHORS = 3; /** * Syntax bit. Repetition operators are only special in valid positions. * <B>Not implemented.</B> */ - public static final int RE_CONTEXT_INDEP_OPS = 4; + public static final int RE_CONTEXT_INDEP_OPS = 4; /** * Syntax bit. Repetition and alternation operators are invalid * at start and end of pattern and other places. * <B>Not implemented</B>. */ - public static final int RE_CONTEXT_INVALID_OPS = 5; + public static final int RE_CONTEXT_INVALID_OPS = 5; /** * Syntax bit. Match-any-character operator (.) matches a newline. */ - public static final int RE_DOT_NEWLINE = 6; + public static final int RE_DOT_NEWLINE = 6; /** * Syntax bit. Match-any-character operator (.) does not match a null. */ - public static final int RE_DOT_NOT_NULL = 7; + public static final int RE_DOT_NOT_NULL = 7; /** * Syntax bit. Intervals ({x}, {x,}, {x,y}) are allowed. */ - public static final int RE_INTERVALS = 8; + public static final int RE_INTERVALS = 8; /** * Syntax bit. No alternation (|), match one-or-more (+), or * match zero-or-one (?) operators. */ - public static final int RE_LIMITED_OPS = 9; + public static final int RE_LIMITED_OPS = 9; /** * Syntax bit. Newline is an alternation operator. */ - public static final int RE_NEWLINE_ALT = 10; // impl. + public static final int RE_NEWLINE_ALT = 10; // impl. /** * Syntax bit. Intervals use { } instead of \{ \} */ - public static final int RE_NO_BK_BRACES = 11; + public static final int RE_NO_BK_BRACES = 11; /** * Syntax bit. Grouping uses ( ) instead of \( \). */ - public static final int RE_NO_BK_PARENS = 12; + public static final int RE_NO_BK_PARENS = 12; /** * Syntax bit. Backreferences not allowed. */ - public static final int RE_NO_BK_REFS = 13; + public static final int RE_NO_BK_REFS = 13; /** * Syntax bit. Alternation uses | instead of \| */ - public static final int RE_NO_BK_VBAR = 14; + public static final int RE_NO_BK_VBAR = 14; /** * Syntax bit. <B>Not implemented</B>. */ - public static final int RE_NO_EMPTY_RANGES = 15; + public static final int RE_NO_EMPTY_RANGES = 15; /** * Syntax bit. An unmatched right parenthesis (')' or '\)', depending @@ -157,80 +159,80 @@ public final class RESyntax implements Serializable { /** * Syntax bit. <B>Not implemented.</B> */ - public static final int RE_HAT_LISTS_NOT_NEWLINE = 17; + public static final int RE_HAT_LISTS_NOT_NEWLINE = 17; /** * Syntax bit. Stingy matching is allowed (+?, *?, ??, {x,y}?). */ - public static final int RE_STINGY_OPS = 18; + public static final int RE_STINGY_OPS = 18; /** * Syntax bit. Allow character class escapes (\d, \D, \s, \S, \w, \W). */ - public static final int RE_CHAR_CLASS_ESCAPES = 19; + public static final int RE_CHAR_CLASS_ESCAPES = 19; /** * Syntax bit. Allow use of (?:xxx) grouping (subexpression is not saved). */ - public static final int RE_PURE_GROUPING = 20; + public static final int RE_PURE_GROUPING = 20; /** * Syntax bit. Allow use of (?=xxx) and (?!xxx) apply the subexpression * to the text following the current position without consuming that text. */ - public static final int RE_LOOKAHEAD = 21; + public static final int RE_LOOKAHEAD = 21; /** * Syntax bit. Allow beginning- and end-of-string anchors (\A, \Z). */ - public static final int RE_STRING_ANCHORS = 22; + public static final int RE_STRING_ANCHORS = 22; /** * Syntax bit. Allow embedded comments, (?#comment), as in Perl5. */ - public static final int RE_COMMENTS = 23; + public static final int RE_COMMENTS = 23; /** * Syntax bit. Allow character class escapes within lists, as in Perl5. */ - public static final int RE_CHAR_CLASS_ESC_IN_LISTS = 24; + public static final int RE_CHAR_CLASS_ESC_IN_LISTS = 24; /** * Syntax bit. Possessive matching is allowed (++, *+, ?+, {x,y}+). */ - public static final int RE_POSSESSIVE_OPS = 25; + public static final int RE_POSSESSIVE_OPS = 25; /** * Syntax bit. Allow embedded flags, (?is-x), as in Perl5. */ - public static final int RE_EMBEDDED_FLAGS = 26; + public static final int RE_EMBEDDED_FLAGS = 26; /** * Syntax bit. Allow octal char (\0377), as in Perl5. */ - public static final int RE_OCTAL_CHAR = 27; + public static final int RE_OCTAL_CHAR = 27; /** * Syntax bit. Allow hex char (\x1b), as in Perl5. */ - public static final int RE_HEX_CHAR = 28; + public static final int RE_HEX_CHAR = 28; /** * Syntax bit. Allow Unicode char (\u1234), as in Java 1.4. */ - public static final int RE_UNICODE_CHAR = 29; + public static final int RE_UNICODE_CHAR = 29; /** * Syntax bit. Allow named property (\p{P}, \P{p}), as in Perl5. */ - public static final int RE_NAMED_PROPERTY = 30; + public static final int RE_NAMED_PROPERTY = 30; /** * Syntax bit. Allow nested characterclass ([a-z&&[^p-r]]), as in Java 1.4. */ - public static final int RE_NESTED_CHARCLASS = 31; + public static final int RE_NESTED_CHARCLASS = 31; - private static final int BIT_TOTAL = 32; + private static final int BIT_TOTAL = 32; /** * Predefined syntax. @@ -315,13 +317,13 @@ public final class RESyntax implements Serializable { * Emulates regular expression support in Larry Wall's perl, version 4, * using single line mode (/s modifier). */ - public static final RESyntax RE_SYNTAX_PERL4_S; // single line mode (/s) + public static final RESyntax RE_SYNTAX_PERL4_S; // single line mode (/s) /** * Predefined syntax. * Emulates regular expression support in Larry Wall's perl, version 5. */ - public static final RESyntax RE_SYNTAX_PERL5; + public static final RESyntax RE_SYNTAX_PERL5; /** * Predefined syntax. @@ -335,145 +337,109 @@ public final class RESyntax implements Serializable { * Emulates regular expression support in Java 1.4's java.util.regex * package. */ - public static final RESyntax RE_SYNTAX_JAVA_1_4; - - static { - // Define syntaxes - - RE_SYNTAX_EMACS = new RESyntax().makeFinal(); - - RESyntax RE_SYNTAX_POSIX_COMMON = new RESyntax() - .set(RE_CHAR_CLASSES) - .set(RE_DOT_NEWLINE) - .set(RE_DOT_NOT_NULL) - .set(RE_INTERVALS) - .set(RE_NO_EMPTY_RANGES) - .makeFinal(); - - RE_SYNTAX_POSIX_BASIC = new RESyntax(RE_SYNTAX_POSIX_COMMON) - .set(RE_BK_PLUS_QM) - .makeFinal(); - - RE_SYNTAX_POSIX_EXTENDED = new RESyntax(RE_SYNTAX_POSIX_COMMON) - .set(RE_CONTEXT_INDEP_ANCHORS) - .set(RE_CONTEXT_INDEP_OPS) - .set(RE_NO_BK_BRACES) - .set(RE_NO_BK_PARENS) - .set(RE_NO_BK_VBAR) - .set(RE_UNMATCHED_RIGHT_PAREN_ORD) - .makeFinal(); - - RE_SYNTAX_AWK = new RESyntax() - .set(RE_BACKSLASH_ESCAPE_IN_LISTS) - .set(RE_DOT_NOT_NULL) - .set(RE_NO_BK_PARENS) - .set(RE_NO_BK_REFS) - .set(RE_NO_BK_VBAR) - .set(RE_NO_EMPTY_RANGES) - .set(RE_UNMATCHED_RIGHT_PAREN_ORD) - .makeFinal(); - - RE_SYNTAX_POSIX_AWK = new RESyntax(RE_SYNTAX_POSIX_EXTENDED) - .set(RE_BACKSLASH_ESCAPE_IN_LISTS) - .makeFinal(); - - RE_SYNTAX_GREP = new RESyntax() - .set(RE_BK_PLUS_QM) - .set(RE_CHAR_CLASSES) - .set(RE_HAT_LISTS_NOT_NEWLINE) - .set(RE_INTERVALS) - .set(RE_NEWLINE_ALT) - .makeFinal(); - - RE_SYNTAX_EGREP = new RESyntax() - .set(RE_CHAR_CLASSES) - .set(RE_CONTEXT_INDEP_ANCHORS) - .set(RE_CONTEXT_INDEP_OPS) - .set(RE_HAT_LISTS_NOT_NEWLINE) - .set(RE_NEWLINE_ALT) - .set(RE_NO_BK_PARENS) - .set(RE_NO_BK_VBAR) - .makeFinal(); - - RE_SYNTAX_POSIX_EGREP = new RESyntax(RE_SYNTAX_EGREP) - .set(RE_INTERVALS) - .set(RE_NO_BK_BRACES) - .makeFinal(); - - /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ - - RE_SYNTAX_ED = new RESyntax(RE_SYNTAX_POSIX_BASIC) - .makeFinal(); - - RE_SYNTAX_SED = new RESyntax(RE_SYNTAX_POSIX_BASIC) - .makeFinal(); - - RE_SYNTAX_POSIX_MINIMAL_BASIC = new RESyntax(RE_SYNTAX_POSIX_COMMON) - .set(RE_LIMITED_OPS) - .makeFinal(); - - /* Differs from RE_SYNTAX_POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS - replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ - - RE_SYNTAX_POSIX_MINIMAL_EXTENDED = new RESyntax(RE_SYNTAX_POSIX_COMMON) - .set(RE_CONTEXT_INDEP_ANCHORS) - .set(RE_CONTEXT_INVALID_OPS) - .set(RE_NO_BK_BRACES) - .set(RE_NO_BK_PARENS) - .set(RE_NO_BK_REFS) - .set(RE_NO_BK_VBAR) - .set(RE_UNMATCHED_RIGHT_PAREN_ORD) - .makeFinal(); - - /* There is no official Perl spec, but here's a "best guess" */ - - RE_SYNTAX_PERL4 = new RESyntax() - .set(RE_BACKSLASH_ESCAPE_IN_LISTS) - .set(RE_CONTEXT_INDEP_ANCHORS) - .set(RE_CONTEXT_INDEP_OPS) // except for '{', apparently - .set(RE_INTERVALS) - .set(RE_NO_BK_BRACES) - .set(RE_NO_BK_PARENS) - .set(RE_NO_BK_VBAR) - .set(RE_NO_EMPTY_RANGES) - .set(RE_CHAR_CLASS_ESCAPES) // \d,\D,\w,\W,\s,\S - .makeFinal(); - - RE_SYNTAX_PERL4_S = new RESyntax(RE_SYNTAX_PERL4) - .set(RE_DOT_NEWLINE) - .makeFinal(); - - RE_SYNTAX_PERL5 = new RESyntax(RE_SYNTAX_PERL4) - .set(RE_PURE_GROUPING) // (?:) - .set(RE_STINGY_OPS) // *?,??,+?,{}? - .set(RE_LOOKAHEAD) // (?=)(?!) - .set(RE_STRING_ANCHORS) // \A,\Z - .set(RE_CHAR_CLASS_ESC_IN_LISTS)// \d,\D,\w,\W,\s,\S within [] - .set(RE_COMMENTS) // (?#) - .set(RE_EMBEDDED_FLAGS) // (?imsx-imsx) - .set(RE_OCTAL_CHAR) // \0377 - .set(RE_HEX_CHAR) // \x1b - .set(RE_NAMED_PROPERTY) // \p{prop}, \P{prop} - .makeFinal(); - - RE_SYNTAX_PERL5_S = new RESyntax(RE_SYNTAX_PERL5) - .set(RE_DOT_NEWLINE) - .makeFinal(); - - RE_SYNTAX_JAVA_1_4 = new RESyntax(RE_SYNTAX_PERL5) - // XXX - .set(RE_POSSESSIVE_OPS) // *+,?+,++,{}+ - .set(RE_UNICODE_CHAR) // \u1234 - .set(RE_NESTED_CHARCLASS) // [a-z&&[^p-r]] - .makeFinal(); + public static final RESyntax RE_SYNTAX_JAVA_1_4; + + static + { + // Define syntaxes + + RE_SYNTAX_EMACS = new RESyntax ().makeFinal (); + + RESyntax RE_SYNTAX_POSIX_COMMON = + new RESyntax ().set (RE_CHAR_CLASSES).set (RE_DOT_NEWLINE). + set (RE_DOT_NOT_NULL).set (RE_INTERVALS).set (RE_NO_EMPTY_RANGES). + makeFinal (); + + RE_SYNTAX_POSIX_BASIC = + new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_BK_PLUS_QM).makeFinal (); + + RE_SYNTAX_POSIX_EXTENDED = + new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_CONTEXT_INDEP_ANCHORS). + set (RE_CONTEXT_INDEP_OPS).set (RE_NO_BK_BRACES).set (RE_NO_BK_PARENS). + set (RE_NO_BK_VBAR).set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal (); + + RE_SYNTAX_AWK = + new RESyntax ().set (RE_BACKSLASH_ESCAPE_IN_LISTS). + set (RE_DOT_NOT_NULL).set (RE_NO_BK_PARENS).set (RE_NO_BK_REFS). + set (RE_NO_BK_VBAR).set (RE_NO_EMPTY_RANGES). + set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal (); + + RE_SYNTAX_POSIX_AWK = + new RESyntax (RE_SYNTAX_POSIX_EXTENDED). + set (RE_BACKSLASH_ESCAPE_IN_LISTS).makeFinal (); + + RE_SYNTAX_GREP = + new RESyntax ().set (RE_BK_PLUS_QM).set (RE_CHAR_CLASSES). + set (RE_HAT_LISTS_NOT_NEWLINE).set (RE_INTERVALS).set (RE_NEWLINE_ALT). + makeFinal (); + + RE_SYNTAX_EGREP = + new RESyntax ().set (RE_CHAR_CLASSES).set (RE_CONTEXT_INDEP_ANCHORS). + set (RE_CONTEXT_INDEP_OPS).set (RE_HAT_LISTS_NOT_NEWLINE). + set (RE_NEWLINE_ALT).set (RE_NO_BK_PARENS).set (RE_NO_BK_VBAR). + makeFinal (); + + RE_SYNTAX_POSIX_EGREP = + new RESyntax (RE_SYNTAX_EGREP).set (RE_INTERVALS).set (RE_NO_BK_BRACES). + makeFinal (); + + /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ + + RE_SYNTAX_ED = new RESyntax (RE_SYNTAX_POSIX_BASIC).makeFinal (); + + RE_SYNTAX_SED = new RESyntax (RE_SYNTAX_POSIX_BASIC).makeFinal (); + + RE_SYNTAX_POSIX_MINIMAL_BASIC = + new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_LIMITED_OPS).makeFinal (); + + /* Differs from RE_SYNTAX_POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS + replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ + + RE_SYNTAX_POSIX_MINIMAL_EXTENDED = + new RESyntax (RE_SYNTAX_POSIX_COMMON).set (RE_CONTEXT_INDEP_ANCHORS). + set (RE_CONTEXT_INVALID_OPS).set (RE_NO_BK_BRACES). + set (RE_NO_BK_PARENS).set (RE_NO_BK_REFS).set (RE_NO_BK_VBAR). + set (RE_UNMATCHED_RIGHT_PAREN_ORD).makeFinal (); + + /* There is no official Perl spec, but here's a "best guess" */ + + RE_SYNTAX_PERL4 = new RESyntax ().set (RE_BACKSLASH_ESCAPE_IN_LISTS).set (RE_CONTEXT_INDEP_ANCHORS).set (RE_CONTEXT_INDEP_OPS) // except for '{', apparently + .set (RE_INTERVALS).set (RE_NO_BK_BRACES).set (RE_NO_BK_PARENS).set (RE_NO_BK_VBAR).set (RE_NO_EMPTY_RANGES).set (RE_CHAR_CLASS_ESCAPES) // \d,\D,\w,\W,\s,\S + .makeFinal (); + + RE_SYNTAX_PERL4_S = + new RESyntax (RE_SYNTAX_PERL4).set (RE_DOT_NEWLINE).makeFinal (); + + RE_SYNTAX_PERL5 = new RESyntax (RE_SYNTAX_PERL4).set (RE_PURE_GROUPING) // (?:) + .set (RE_STINGY_OPS) // *?,??,+?,{}? + .set (RE_LOOKAHEAD) // (?=)(?!) + .set (RE_STRING_ANCHORS) // \A,\Z + .set (RE_CHAR_CLASS_ESC_IN_LISTS) // \d,\D,\w,\W,\s,\S within [] + .set (RE_COMMENTS) // (?#) + .set (RE_EMBEDDED_FLAGS) // (?imsx-imsx) + .set (RE_OCTAL_CHAR) // \0377 + .set (RE_HEX_CHAR) // \x1b + .set (RE_NAMED_PROPERTY) // \p{prop}, \P{prop} + .makeFinal (); + + RE_SYNTAX_PERL5_S = + new RESyntax (RE_SYNTAX_PERL5).set (RE_DOT_NEWLINE).makeFinal (); + + RE_SYNTAX_JAVA_1_4 = new RESyntax (RE_SYNTAX_PERL5) + // XXX + .set (RE_POSSESSIVE_OPS) // *+,?+,++,{}+ + .set (RE_UNICODE_CHAR) // \u1234 + .set (RE_NESTED_CHARCLASS) // [a-z&&[^p-r]] + .makeFinal (); } /** * Construct a new syntax object with all bits turned off. * This is equivalent to RE_SYNTAX_EMACS. */ - public RESyntax() { - bits = new BitSet(BIT_TOTAL); + public RESyntax () + { + bits = new BitSet (BIT_TOTAL); } /** @@ -484,24 +450,27 @@ public final class RESyntax implements Serializable { * * @return this object for convenient chaining */ - public RESyntax makeFinal() { - isFinal = true; - return this; - } + public RESyntax makeFinal () + { + isFinal = true; + return this; + } /** * Construct a new syntax object with all bits set the same * as the other syntax. */ - public RESyntax(RESyntax other) { - bits = (BitSet) other.bits.clone(); + public RESyntax (RESyntax other) + { + bits = (BitSet) other.bits.clone (); } /** * Check if a given bit is set in this syntax. */ - public boolean get(int index) { - return bits.get(index); + public boolean get (int index) + { + return bits.get (index); } /** @@ -510,10 +479,11 @@ public final class RESyntax implements Serializable { * @param index the constant (RESyntax.RE_xxx) bit to set. * @return a reference to this object for easy chaining. */ - public RESyntax set(int index) { + public RESyntax set (int index) + { if (isFinal) - throw new IllegalAccessError(RE.getLocalizedMessage("syntax.final")); - bits.set(index); + throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final")); + bits.set (index); return this; } @@ -523,11 +493,12 @@ public final class RESyntax implements Serializable { * @param index the constant (RESyntax.RE_xxx) bit to clear. * @return a reference to this object for easy chaining. */ - public RESyntax clear(int index) { - if (isFinal) - throw new IllegalAccessError(RE.getLocalizedMessage("syntax.final")); - bits.clear(index); - return this; + public RESyntax clear (int index) + { + if (isFinal) + throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final")); + bits.clear (index); + return this; } /** @@ -547,18 +518,20 @@ public final class RESyntax implements Serializable { * * @return this object for convenient chaining */ - public RESyntax setLineSeparator(String aSeparator) { - if (isFinal) - throw new IllegalAccessError(RE.getLocalizedMessage("syntax.final")); - lineSeparator = aSeparator; - return this; - } + public RESyntax setLineSeparator (String aSeparator) + { + if (isFinal) + throw new IllegalAccessError (RE.getLocalizedMessage ("syntax.final")); + lineSeparator = aSeparator; + return this; + } /** * Returns the currently active line separator string. The default * is the platform-dependent system property "line.separator". */ - public String getLineSeparator() { - return lineSeparator; - } + public String getLineSeparator () + { + return lineSeparator; + } } |