diff options
Diffstat (limited to 'libjava/gnu/xml/aelfred2/XmlParser.java')
| -rw-r--r-- | libjava/gnu/xml/aelfred2/XmlParser.java | 5113 | 
1 files changed, 5113 insertions, 0 deletions
| diff --git a/libjava/gnu/xml/aelfred2/XmlParser.java b/libjava/gnu/xml/aelfred2/XmlParser.java new file mode 100644 index 00000000000..f4abf222993 --- /dev/null +++ b/libjava/gnu/xml/aelfred2/XmlParser.java @@ -0,0 +1,5113 @@ +/* XmlParser.java --  +   Copyright (C) 1999,2000,2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING.  If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library.  Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module.  An independent module is a module which is not derived from +or based on this library.  If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so.  If you do not wish to do so, delete this +exception statement from your version. + +Partly derived from code which carried the following notice: + +  Copyright (c) 1997, 1998 by Microstar Software Ltd. + +  AElfred is free for both commercial and non-commercial use and +  redistribution, provided that Microstar's copyright and disclaimer are +  retained intact.  You are free to modify AElfred for your own use and +  to redistribute AElfred with your modifications, provided that the +  modifications are clearly documented. + +  This program is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  merchantability or fitness for a particular purpose.  Please use it AT +  YOUR OWN RISK. +*/ + +package gnu.xml.aelfred2; + +import java.io.BufferedInputStream; +import java.io.CharConversionException; +import java.io.EOFException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.IOException; +import java.io.Reader; +import java.io.UnsupportedEncodingException; +import java.net.URL; +import java.net.URLConnection; + +// maintaining 1.1 compatibility for now ... +// Iterator and Hashmap ought to be faster +import java.util.Enumeration; +import java.util.Hashtable; +import java.util.Stack; + +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + + +/** + * Parse XML documents and return parse events through call-backs. + * Use the <code>SAXDriver</code> class as your entry point, as all + * internal parser interfaces are subject to change. + * + * @author Written by David Megginson <dmeggins@microstar.com> + *	(version 1.2a with bugfixes) + * @author Updated by David Brownell <dbrownell@users.sourceforge.net> + * @see SAXDriver + */ +final class XmlParser +{ +    // avoid slow per-character readCh() +    private final static boolean USE_CHEATS = true; + + +    ////////////////////////////////////////////////////////////////////// +    // Constructors. +    //////////////////////////////////////////////////////////////////////// + + +    /** +     * Construct a new parser with no associated handler. +     * @see #setHandler +     * @see #parse +     */ +    // package private +    XmlParser () +    { +    } + + +    /** +     * Set the handler that will receive parsing events. +     * @param handler The handler to receive callback events. +     * @see #parse +     */ +    // package private +    void setHandler (SAXDriver handler) +    { +	this.handler = handler; +    } + + +    /** +     * Parse an XML document from the character stream, byte stream, or URI +     * that you provide (in that order of preference).  Any URI that you +     * supply will become the base URI for resolving relative URI, and may +     * be used to acquire a reader or byte stream. +     * +     * <p> Only one thread at a time may use this parser; since it is +     * private to this package, post-parse cleanup is done by the caller, +     * which MUST NOT REUSE the parser (just null it). +     * +     * @param systemId Absolute URI of the document; should never be null, +     *	but may be so iff a reader <em>or</em> a stream is provided. +     * @param publicId The public identifier of the document, or null. +     * @param reader A character stream; must be null if stream isn't. +     * @param stream A byte input stream; must be null if reader isn't. +     * @param encoding The suggested encoding, or null if unknown. +     * @exception java.lang.Exception Basically SAXException or IOException +     */ +    // package private  +    void doParse ( +	String		systemId, +	String		publicId, +	Reader		reader, +	InputStream	stream, +	String		encoding +    ) throws Exception +    { +	if (handler == null) +	    throw new IllegalStateException ("no callback handler"); + +	initializeVariables (); + +	// predeclare the built-in entities here (replacement texts) +	// we don't need to intern(), since we're guaranteed literals +	// are always (globally) interned. +	setInternalEntity ("amp", "&"); +	setInternalEntity ("lt", "<"); +	setInternalEntity ("gt", ">"); +	setInternalEntity ("apos", "'"); +	setInternalEntity ("quot", """); + +	try { +	    // pushURL first to ensure locator is correct in startDocument +	    // ... it might report an IO or encoding exception. +	    handler.startDocument (); +	    pushURL (false, "[document]", +			// default baseURI: null +		    new String [] { publicId, systemId, null}, +		    reader, stream, encoding, false); + +	    parseDocument (); +	} catch (EOFException e){ +	    //empty input +	    error("empty document, with no root element."); +	}finally { +	    if (reader != null) +		try { reader.close (); +		} catch (IOException e) { /* ignore */ } +	    if (stream != null) +		try { stream.close (); +		} catch (IOException e) { /* ignore */ } +	    if (is != null) +		try { is.close (); +		} catch (IOException e) { /* ignore */ } +	    if (reader != null) +		try { +		    reader.close (); +		} catch (IOException e) { /* ignore */ +		} +	    scratch = null; +	} +    } + + +    //////////////////////////////////////////////////////////////////////// +    // Constants. +    //////////////////////////////////////////////////////////////////////// + +    // +    // Constants for element content type. +    // + +    /** +     * Constant: an element has not been declared. +     * @see #getElementContentType +     */ +    public final static int CONTENT_UNDECLARED = 0; + +    /** +     * Constant: the element has a content model of ANY. +     * @see #getElementContentType +     */ +    public final static int CONTENT_ANY = 1; + +    /** +     * Constant: the element has declared content of EMPTY. +     * @see #getElementContentType +     */ +    public final static int CONTENT_EMPTY = 2; + +    /** +     * Constant: the element has mixed content. +     * @see #getElementContentType +     */ +    public final static int CONTENT_MIXED = 3; + +    /** +     * Constant: the element has element content. +     * @see #getElementContentType +     */ +    public final static int CONTENT_ELEMENTS = 4; + + +    // +    // Constants for the entity type. +    // + +    /** +     * Constant: the entity has not been declared. +     * @see #getEntityType +     */ +    public final static int ENTITY_UNDECLARED = 0; + +    /** +     * Constant: the entity is internal. +     * @see #getEntityType +     */ +    public final static int ENTITY_INTERNAL = 1; + +    /** +     * Constant: the entity is external, non-parsable data. +     * @see #getEntityType +     */ +    public final static int ENTITY_NDATA = 2; + +    /** +     * Constant: the entity is external XML data. +     * @see #getEntityType +     */ +    public final static int ENTITY_TEXT = 3; + + +    // +    // Attribute type constants are interned literal strings. +    // + +    // +    // Constants for supported encodings.  "external" is just a flag. +    // +    private final static int ENCODING_EXTERNAL = 0; +    private final static int ENCODING_UTF_8 = 1; +    private final static int ENCODING_ISO_8859_1 = 2; +    private final static int ENCODING_UCS_2_12 = 3; +    private final static int ENCODING_UCS_2_21 = 4; +    private final static int ENCODING_UCS_4_1234 = 5; +    private final static int ENCODING_UCS_4_4321 = 6; +    private final static int ENCODING_UCS_4_2143 = 7; +    private final static int ENCODING_UCS_4_3412 = 8; +    private final static int ENCODING_ASCII = 9; + + +    // +    // Constants for attribute default value. +    // + +    /** +     * Constant: the attribute is not declared. +     * @see #getAttributeDefaultValueType +     */ +    public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30; + +    /** +     * Constant: the attribute has a literal default value specified. +     * @see #getAttributeDefaultValueType +     * @see #getAttributeDefaultValue +     */ +    public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31; + +    /** +     * Constant: the attribute was declared #IMPLIED. +     * @see #getAttributeDefaultValueType +     */ +    public final static int ATTRIBUTE_DEFAULT_IMPLIED = 32; + +    /** +     * Constant: the attribute was declared #REQUIRED. +     * @see #getAttributeDefaultValueType +     */ +    public final static int ATTRIBUTE_DEFAULT_REQUIRED = 33; + +    /** +     * Constant: the attribute was declared #FIXED. +     * @see #getAttributeDefaultValueType +     * @see #getAttributeDefaultValue +     */ +    public final static int ATTRIBUTE_DEFAULT_FIXED = 34; + + +    // +    // Constants for input. +    // +    private final static int INPUT_NONE = 0; +    private final static int INPUT_INTERNAL = 1; +    private final static int INPUT_STREAM = 3; +    private final static int INPUT_READER = 5; + + +    // +    // Flags for reading literals. +    // +	// expand general entity refs (attribute values in dtd and content) +    private final static int LIT_ENTITY_REF = 2; +	// normalize this value (space chars) (attributes, public ids) +    private final static int LIT_NORMALIZE = 4; +	// literal is an attribute value  +    private final static int LIT_ATTRIBUTE = 8; +	// don't expand parameter entities +    private final static int LIT_DISABLE_PE = 16; +	// don't expand [or parse] character refs +    private final static int LIT_DISABLE_CREF = 32; +	// don't parse general entity refs +    private final static int LIT_DISABLE_EREF = 64; +	// literal is a public ID value  +    private final static int LIT_PUBID = 256; + + +    // +    // Flags affecting PE handling in DTDs (if expandPE is true). +    // PEs expand with space padding, except inside literals. +    // +    private final static int CONTEXT_NORMAL = 0; +    private final static int CONTEXT_LITERAL = 1; + + +    ////////////////////////////////////////////////////////////////////// +    // Error reporting. +    ////////////////////////////////////////////////////////////////////// + + +    /** +     * Report an error. +     * @param message The error message. +     * @param textFound The text that caused the error (or null). +     * @see SAXDriver#error +     * @see #line +     */ +    private void error (String message, String textFound, String textExpected) +    throws SAXException +    { +	if (textFound != null) { +	    message = message + " (found \"" + textFound + "\")"; +	} +	if (textExpected != null) { +	    message = message + " (expected \"" + textExpected + "\")"; +	} +	handler.fatal (message); + +	// "can't happen" +	throw new SAXException (message); +    } + + +    /** +     * Report a serious error. +     * @param message The error message. +     * @param textFound The text that caused the error (or null). +     */ +    private void error (String message, char textFound, String textExpected) +    throws SAXException +    { +	error (message, new Character (textFound).toString (), textExpected); +    } + +    /** Report typical case fatal errors. */ +    private void error (String message) +    throws SAXException +    { +	handler.fatal (message); +    } + + +    ////////////////////////////////////////////////////////////////////// +    // Major syntactic productions. +    ////////////////////////////////////////////////////////////////////// + + +    /** +     * Parse an XML document. +     * <pre> +     * [1] document ::= prolog element Misc* +     * </pre> +     * <p>This is the top-level parsing function for a single XML +     * document.  As a minimum, a well-formed document must have +     * a document element, and a valid document must have a prolog +     * (one with doctype) as well. +     */ +    private void parseDocument () +    throws Exception +    { +        try {                                       // added by MHK +    	    boolean sawDTD = parseProlog (); +    	    require ('<'); +    	    parseElement (!sawDTD); +        } catch (EOFException ee) {                 // added by MHK +            error("premature end of file", "[EOF]", null); +        } +         +    	try { +    	    parseMisc ();   //skip all white, PIs, and comments +    	    char c = readCh ();    //if this doesn't throw an exception... +    	    error ("unexpected characters after document end", c, null); +    	} catch (EOFException e) { +    	    return; +    	} +    } + +    static final char	startDelimComment [] = { '<', '!', '-', '-' }; +    static final char	endDelimComment [] = { '-', '-' }; + +    /** +     * Skip a comment. +     * <pre> +     * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* "-->" +     * </pre> +     * <p> (The <code><!--</code> has already been read.) +     */ +    private void parseComment () +    throws Exception +    { +	char c; +	boolean saved = expandPE; + +	expandPE = false; +	parseUntil (endDelimComment); +	require ('>'); +	expandPE = saved; +	handler.comment (dataBuffer, 0, dataBufferPos); +	dataBufferPos = 0; +    } + +    static final char	startDelimPI [] = { '<', '?' }; +    static final char	endDelimPI [] = { '?', '>' }; + +    /** +     * Parse a processing instruction and do a call-back. +     * <pre> +     * [16] PI ::= '<?' PITarget +     *		(S (Char* - (Char* '?>' Char*)))? +     *		'?>' +     * [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') ) +     * </pre> +     * <p> (The <code><?</code> has already been read.) +     */ +    private void parsePI () +    throws SAXException, IOException +    { +	String name; +	boolean saved = expandPE; + +	expandPE = false; +	name = readNmtoken (true); +	//NE08 +	if (name.indexOf(':') >= 0) +           error ("Illegal character(':') in processing instruction name ", name, null); +	if ("xml".equalsIgnoreCase (name)) +	    error ("Illegal processing instruction target", name, null); +	if (!tryRead (endDelimPI)) { +	    requireWhitespace (); +	    parseUntil (endDelimPI); +	} +	expandPE = saved; +	handler.processingInstruction (name, dataBufferToString ()); +    } + + +    static final char	endDelimCDATA [] = { ']', ']', '>' }; + +	private boolean isDirtyCurrentElement; + +    /** +     * Parse a CDATA section. +     * <pre> +     * [18] CDSect ::= CDStart CData CDEnd +     * [19] CDStart ::= '<![CDATA[' +     * [20] CData ::= (Char* - (Char* ']]>' Char*)) +     * [21] CDEnd ::= ']]>' +     * </pre> +     * <p> (The '<![CDATA[' has already been read.) +     */ +    private void parseCDSect () +    throws Exception +    { +	parseUntil (endDelimCDATA); +	dataBufferFlush (); +    } + + +    /** +     * Parse the prolog of an XML document. +     * <pre> +     * [22] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)? +     * </pre> +     * <p>We do not look for the XML declaration here, because it was +     * handled by pushURL (). +     * @see pushURL +     * @return true if a DTD was read. +     */ +    private boolean parseProlog () +    throws Exception +    { +	parseMisc (); + +	if (tryRead ("<!DOCTYPE")) { +	    parseDoctypedecl (); +	    parseMisc (); +	    return true; +	} +	return false; +    } + +    private void checkLegalVersion (String version) +    throws SAXException +    { +	int len = version.length (); +	for (int i = 0; i < len; i++) { +	    char c = version.charAt (i); +	    if ('0' <= c && c <= '9') +		continue; +	    if (c == '_' || c == '.' || c == ':' || c == '-') +		continue; +	    if ('a' <= c && c <= 'z') +		continue; +	    if ('A' <= c && c <= 'Z') +		continue; +	    error ("illegal character in version", version, "1.0"); +	} +    } + + +    /** +     * Parse the XML declaration. +     * <pre> +     * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' +     * [24] VersionInfo ::= S 'version' Eq +     *		("'" VersionNum "'" | '"' VersionNum '"' ) +     * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')* +     * [32] SDDecl ::= S 'standalone' Eq +     *		( "'"" ('yes' | 'no') "'"" | '"' ("yes" | "no") '"' ) +     * [80] EncodingDecl ::= S 'encoding' Eq +     *		( "'" EncName "'" | "'" EncName "'" ) +     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* +     * </pre> +     * <p> (The <code><?xml</code> and whitespace have already been read.) +     * @return the encoding in the declaration, uppercased; or null +     * @see #parseTextDecl +     * @see #setupDecoding +     */ +    private String parseXMLDecl (boolean ignoreEncoding) +    throws SAXException, IOException +    { +	String	version; +	String	encodingName = null; +	String	standalone = null; +	int	flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; +        String inputEncoding = null; +         +        switch (this.encoding) +          { +          case ENCODING_EXTERNAL: +          case ENCODING_UTF_8: +            inputEncoding = "UTF-8"; +            break; +          case ENCODING_ISO_8859_1: +            inputEncoding = "ISO-8859-1"; +            break; +          case ENCODING_UCS_2_12: +            inputEncoding = "UTF-16BE"; +            break; +          case ENCODING_UCS_2_21: +            inputEncoding = "UTF-16LE"; +            break; +          } + +	// Read the version. +	require ("version"); +	parseEq (); +	checkLegalVersion (version = readLiteral (flags)); +	if (!version.equals ("1.0")){ +	    if(version.equals ("1.1")){ +	    	handler.warn ("expected XML version 1.0, not: " + version); +	    	xmlVersion = XML_11; +	    }else { +	    	error("illegal XML version", version, "1.0 or 1.1"); +	    } +	} +	else +	    xmlVersion = XML_10; +	// Try reading an encoding declaration. +	boolean white = tryWhitespace (); + +	if (tryRead ("encoding")) { +	    if (!white) +		error ("whitespace required before 'encoding='"); +	    parseEq (); +	    encodingName = readLiteral (flags); +	    if (!ignoreEncoding) +		setupDecoding (encodingName); +	} + +	// Try reading a standalone declaration +	if (encodingName != null) +	    white = tryWhitespace (); +	if (tryRead ("standalone")) { +	    if (!white) +		error ("whitespace required before 'standalone='"); +	    parseEq (); +	    standalone = readLiteral (flags); +	    if ("yes".equals (standalone)) +		docIsStandalone = true; +	    else if (!"no".equals (standalone)) +		error ("standalone flag must be 'yes' or 'no'"); +	} + +	skipWhitespace (); +	require ("?>"); + +        if (inputEncoding == null) +          { +            inputEncoding = encodingName; +          } +        handler.xmlDecl(version, encodingName, "yes".equals(standalone), +                        inputEncoding); + +	return encodingName; +    } + + +    /** +     * Parse a text declaration. +     * <pre> +     * [79] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' +     * [80] EncodingDecl ::= S 'encoding' Eq +     *		( '"' EncName '"' | "'" EncName "'" ) +     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* +     * </pre> +     * <p> (The <code><?xml</code>' and whitespace have already been read.) +     * @return the encoding in the declaration, uppercased; or null +     * @see #parseXMLDecl +     * @see #setupDecoding +     */ +    private String parseTextDecl (boolean ignoreEncoding) +    throws SAXException, IOException +    { +	String	encodingName = null; +	int	flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; + +	// Read an optional version. +	if (tryRead ("version")) { +	    String version; +	    parseEq (); +	    checkLegalVersion (version = readLiteral (flags)); +	     +	    if (version.equals ("1.1")){ +	    	if (xmlVersion == XML_10){ +	    	   error ("external subset has later version number.", "1.0", version);     +	    	} +		handler.warn ("expected XML version 1.0, not: " + version); +		xmlVersion = XML_11; +             }else if(!version.equals ("1.0")) { +		 error("illegal XML version", version, "1.0 or 1.1"); +	     } +	    requireWhitespace (); +	} + + +	// Read the encoding. +	require ("encoding"); +	parseEq (); +	encodingName = readLiteral (flags); +	if (!ignoreEncoding) +	    setupDecoding (encodingName); + +	skipWhitespace (); +	require ("?>"); + +	return encodingName; +    } + + +    /** +     * Sets up internal state so that we can decode an entity using the +     * specified encoding.  This is used when we start to read an entity +     * and we have been given knowledge of its encoding before we start to +     * read any data (e.g. from a SAX input source or from a MIME type). +     * +     * <p> It is also used after autodetection, at which point only very +     * limited adjustments to the encoding may be used (switching between +     * related builtin decoders). +     * +     * @param encodingName The name of the encoding specified by the user. +     * @exception IOException if the encoding isn't supported either +     *	internally to this parser, or by the hosting JVM. +     * @see #parseXMLDecl +     * @see #parseTextDecl +     */ +    private void setupDecoding (String encodingName) +    throws SAXException, IOException +    { +	encodingName = encodingName.toUpperCase (); + +	// ENCODING_EXTERNAL indicates an encoding that wasn't +	// autodetected ... we can use builtin decoders, or +	// ones from the JVM (InputStreamReader). + +	// Otherwise we can only tweak what was autodetected, and +	// only for single byte (ASCII derived) builtin encodings. + +	// ASCII-derived encodings +	if (encoding == ENCODING_UTF_8 || encoding == ENCODING_EXTERNAL) { +	    if (encodingName.equals ("ISO-8859-1") +		    || encodingName.equals ("8859_1") +		    || encodingName.equals ("ISO8859_1") +	      ) { +		encoding = ENCODING_ISO_8859_1; +		return; +	    } else if (encodingName.equals ("US-ASCII") +			|| encodingName.equals ("ASCII")) { +		encoding = ENCODING_ASCII; +		return; +	    } else if (encodingName.equals ("UTF-8") +			|| encodingName.equals ("UTF8")) { +		encoding = ENCODING_UTF_8; +		return; +	    } else if (encoding != ENCODING_EXTERNAL) { +		// used to start with a new reader ... +		throw new UnsupportedEncodingException (encodingName); +	    } +	    // else fallthrough ... +	    // it's ASCII-ish and something other than a builtin +	} + +	// Unicode and such +	if (encoding == ENCODING_UCS_2_12 || encoding == ENCODING_UCS_2_21) { +	    if (!(encodingName.equals ("ISO-10646-UCS-2") +		    || encodingName.equals ("UTF-16") +		    || encodingName.equals ("UTF-16BE") +		    || encodingName.equals ("UTF-16LE"))) +		error ("unsupported Unicode encoding", +		       encodingName, +		       "UTF-16"); +	    return; +	} + +	// four byte encodings +	if (encoding == ENCODING_UCS_4_1234 +		|| encoding == ENCODING_UCS_4_4321 +		|| encoding == ENCODING_UCS_4_2143 +		|| encoding == ENCODING_UCS_4_3412) { +	    // Strictly:  "UCS-4" == "UTF-32BE"; also, "UTF-32LE" exists +	    if (!encodingName.equals ("ISO-10646-UCS-4")) +		error ("unsupported 32-bit encoding", +		       encodingName, +		       "ISO-10646-UCS-4"); +	    return; +	} + +	// assert encoding == ENCODING_EXTERNAL +	// if (encoding != ENCODING_EXTERNAL) +	//     throw new RuntimeException ("encoding = " + encoding); + +	if (encodingName.equals ("UTF-16BE")) { +	    encoding = ENCODING_UCS_2_12; +	    return; +	} +	if (encodingName.equals ("UTF-16LE")) { +	    encoding = ENCODING_UCS_2_21; +	    return; +	} + +	// We couldn't use the builtin decoders at all.  But we can try to +	// create a reader, since we haven't messed up buffering.  Tweak +	// the encoding name if necessary. + +	if (encodingName.equals ("UTF-16") +		|| encodingName.equals ("ISO-10646-UCS-2")) +	    encodingName = "Unicode"; +	// Ignoring all the EBCDIC aliases here + +	reader = new InputStreamReader (is, encodingName); +	sourceType = INPUT_READER; +    } + + +    /** +     * Parse miscellaneous markup outside the document element and DOCTYPE +     * declaration. +     * <pre> +     * [27] Misc ::= Comment | PI | S +     * </pre> +     */ +    private void parseMisc () +    throws Exception +    { +	while (true) { +	    skipWhitespace (); +	    if (tryRead (startDelimPI)) { +		parsePI (); +	    } else if (tryRead (startDelimComment)) { +		parseComment (); +	    } else { +		return; +	    } +	} +    } + + +    /** +     * Parse a document type declaration. +     * <pre> +     * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? +     *		('[' (markupdecl | PEReference | S)* ']' S?)? '>' +     * </pre> +     * <p> (The <code><!DOCTYPE</code> has already been read.) +     */ +    private void parseDoctypedecl () +    throws Exception +    { +	String rootName, ids[]; + +	// Read the document type name. +	requireWhitespace (); +	rootName = readNmtoken (true); + +	// Read the External subset's IDs +	skipWhitespace (); +	ids = readExternalIds (false, true); + +	// report (a) declaration of name, (b) lexical info (ids) +	handler.doctypeDecl (rootName, ids [0], ids [1]); + +	// Internal subset is parsed first, if present +	skipWhitespace (); +	if (tryRead ('[')) { + +	    // loop until the subset ends +	    while (true) { +		doReport = expandPE = true; +		skipWhitespace (); +		doReport = expandPE = false; +		if (tryRead (']')) { +		    break; 		// end of subset +		} else { +		    // WFC, PEs in internal subset (only between decls) +		    peIsError = expandPE = true; +		    parseMarkupdecl (); +		    peIsError = expandPE = false; +		} +	    } +	} +	skipWhitespace (); +	require ('>'); + +	// Read the external subset, if any +	InputSource	subset; + +	if (ids [1] == null) +	    subset = handler.getExternalSubset (rootName, +	    		handler.getSystemId ()); +	else +	    subset = null; +	if (ids [1] != null || subset != null) { +	    pushString (null, ">"); + +	    // NOTE:  [dtd] is so we say what SAX2 expects, +	    // though it's misleading (subset, not entire dtd) +	    if (ids [1] != null) +		pushURL (true, "[dtd]", ids, null, null, null, true); +	    else { +		handler.warn ("modifying document by adding external subset"); +		pushURL (true, "[dtd]", +		    new String [] { subset.getPublicId (), +			    subset.getSystemId (), null }, +		    subset.getCharacterStream (), +		    subset.getByteStream (), +		    subset.getEncoding (), +		    false); +	    } + +	    // Loop until we end up back at '>' +	    while (true) { +		doReport = expandPE = true; +		skipWhitespace (); +		doReport = expandPE = false; +		if (tryRead ('>')) { +		    break; +		} else { +		    expandPE = true; +		    parseMarkupdecl (); +		    expandPE = false; +		} +	    } + +	    // the ">" string isn't popped yet +	    if (inputStack.size () != 1) +		error ("external subset has unmatched '>'"); +	} + +	// done dtd +	handler.endDoctype (); +	expandPE = false; +	doReport = true; +    } + + +    /** +     * Parse a markup declaration in the internal or external DTD subset. +     * <pre> +     * [29] markupdecl ::= elementdecl | Attlistdecl | EntityDecl +     *		| NotationDecl | PI | Comment +     * [30] extSubsetDecl ::= (markupdecl | conditionalSect +     *		| PEReference | S) * +     * </pre> +     * <p> Reading toplevel PE references is handled as a lexical issue +     * by the caller, as is whitespace. +     */ +    private void parseMarkupdecl () +    throws Exception +    { +	char	saved [] = null; +	boolean	savedPE = expandPE; + +	// prevent "<%foo;" and ensures saved entity is right +	require ('<'); +	unread ('<'); +	expandPE = false; + +	if (tryRead ("<!ELEMENT")) { +	    saved = readBuffer; +	    expandPE = savedPE; +	    parseElementDecl (); +	} else if (tryRead ("<!ATTLIST")) { +	    saved = readBuffer; +	    expandPE = savedPE; +	    parseAttlistDecl (); +	} else if (tryRead ("<!ENTITY")) { +	    saved = readBuffer; +	    expandPE = savedPE; +	    parseEntityDecl (); +	} else if (tryRead ("<!NOTATION")) { +	    saved = readBuffer; +	    expandPE = savedPE; +	    parseNotationDecl (); +	} else if (tryRead (startDelimPI)) { +	    saved = readBuffer; +	    expandPE = savedPE; +	    parsePI (); +	} else if (tryRead (startDelimComment)) { +	    saved = readBuffer; +	    expandPE = savedPE; +	    parseComment (); +	} else if (tryRead ("<![")) { +	    saved = readBuffer; +	    expandPE = savedPE; +	    if (inputStack.size () > 0) +		parseConditionalSect (saved); +	    else +		error ("conditional sections illegal in internal subset"); +	} else { +	    error ("expected markup declaration"); +	} + +	// VC: Proper Decl/PE Nesting +	if (readBuffer != saved) +	    handler.verror ("Illegal Declaration/PE nesting"); +    } + + +    /** +     * Parse an element, with its tags. +     * <pre> +     * [39] element ::= EmptyElementTag | STag content ETag +     * [40] STag ::= '<' Name (S Attribute)* S? '>' +     * [44] EmptyElementTag ::= '<' Name (S Attribute)* S? '/>' +     * </pre> +     * <p> (The '<' has already been read.) +     * <p>NOTE: this method actually chains onto parseContent (), if necessary, +     * and parseContent () will take care of calling parseETag (). +     */ +    private void parseElement (boolean maybeGetSubset) +    throws Exception +    { +	String	gi; +	char	c; +	int	oldElementContent = currentElementContent; +	String	oldElement = currentElement; +	Object	element []; + +	// This is the (global) counter for the +	// array of specified attributes. +	tagAttributePos = 0; + +	// Read the element type name. +	gi = readNmtoken (true); + +	// If we saw no DTD, and this is the document root element, +	// let the application modify the input stream by providing one. +	if (maybeGetSubset) { +	    InputSource	subset = handler.getExternalSubset (gi, +	    		handler.getSystemId ()); +	    if (subset != null) { +		String	publicId = subset.getPublicId (); +		String	systemId = subset.getSystemId (); + +		handler.warn ("modifying document by adding DTD"); +		handler.doctypeDecl (gi, publicId, systemId); +		pushString (null, ">"); + +		// NOTE:  [dtd] is so we say what SAX2 expects, +		// though it's misleading (subset, not entire dtd) +		pushURL (true, "[dtd]", +		    new String [] { publicId, systemId, null }, +		    subset.getCharacterStream (), +		    subset.getByteStream (), +		    subset.getEncoding (), +		    false); + +		// Loop until we end up back at '>' +		while (true) { +		    doReport = expandPE = true; +		    skipWhitespace (); +		    doReport = expandPE = false; +		    if (tryRead ('>')) { +			break; +		    } else { +			expandPE = true; +			parseMarkupdecl (); +			expandPE = false; +		    } +		} + +		// the ">" string isn't popped yet +		if (inputStack.size () != 1) +		    error ("external subset has unmatched '>'"); + +		handler.endDoctype (); +	    } +	} + +	// Determine the current content type. +	currentElement = gi; +	element = (Object []) elementInfo.get (gi); +	currentElementContent = getContentType (element, CONTENT_ANY); + +	// Read the attributes, if any. +	// After this loop, "c" is the closing delimiter. +	boolean white = tryWhitespace (); +	c = readCh (); +	while (c != '/' && c != '>') { +	    unread (c); +	    if (!white) +		error ("need whitespace between attributes"); +	    parseAttribute (gi); +	    white = tryWhitespace (); +	    c = readCh (); +	} + +	// Supply any defaulted attributes. +	Enumeration atts = declaredAttributes (element); +	if (atts != null) { +	    String aname; +loop: +	    while (atts.hasMoreElements ()) { +		aname = (String) atts.nextElement (); +		// See if it was specified. +		for (int i = 0; i < tagAttributePos; i++) { +		    if (tagAttributes [i] == aname) { +			continue loop; +		    } +		} +		// ... or has a default +		String value = getAttributeDefaultValue (gi, aname); + +		if (value == null) +		    continue; +		handler.attribute (aname, value, false); +	    } +	} + +	// Figure out if this is a start tag +	// or an empty element, and dispatch an +	// event accordingly. +	switch (c) { +	case '>': +	    handler.startElement (gi); +	    parseContent (); +	    break; +	case '/': +	    require ('>'); +	    handler.startElement (gi); +	    handler.endElement (gi); +	    break; +	} + +	// Restore the previous state. +	currentElement = oldElement; +	currentElementContent = oldElementContent; +    } + + +    /** +     * Parse an attribute assignment. +     * <pre> +     * [41] Attribute ::= Name Eq AttValue +     * </pre> +     * @param name The name of the attribute's element. +     * @see SAXDriver#attribute +     */ +    private void parseAttribute (String name) +    throws Exception +    { +	String aname; +	String type; +	String value; +	int flags = LIT_ATTRIBUTE |  LIT_ENTITY_REF; + +	// Read the attribute name. +	aname = readNmtoken (true); +	type = getAttributeType (name, aname); + +	// Parse '=' +	parseEq (); + +	// Read the value, normalizing whitespace +	// unless it is CDATA. +  if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) { +    if (type == "CDATA" || type == null) { +	    value = readLiteral (flags); +    } else { +	    value = readLiteral (flags | LIT_NORMALIZE); +    } +  } else { +    if (type.equals("CDATA") || type == null) { +	    value = readLiteral (flags); +    } else { +	    value = readLiteral (flags | LIT_NORMALIZE); +    } +  } + +	// WFC: no duplicate attributes +	for (int i = 0; i < tagAttributePos; i++) +	    if (aname.equals (tagAttributes [i])) +		error ("duplicate attribute", aname, null); + +	// Inform the handler about the +	// attribute. +	handler.attribute (aname, value, true); +	dataBufferPos = 0; + +	// Note that the attribute has been +	// specified. +	if (tagAttributePos == tagAttributes.length) { +	    String newAttrib[] = new String [tagAttributes.length * 2]; +	    System.arraycopy (tagAttributes, 0, newAttrib, 0, tagAttributePos); +	    tagAttributes = newAttrib; +	} +	tagAttributes [tagAttributePos++] = aname; +    } + + +    /** +     * Parse an equals sign surrounded by optional whitespace. +     * <pre> +     * [25] Eq ::= S? '=' S? +     * </pre> +     */ +    private void parseEq () +    throws SAXException, IOException +    { +	skipWhitespace (); +	require ('='); +	skipWhitespace (); +    } + + +    /** +     * Parse an end tag. +     * <pre> +     * [42] ETag ::= '</' Name S? '>' +     * </pre> +     * <p>NOTE: parseContent () chains to here, we already read the +     * "</". +     */ +    private void parseETag () +    throws Exception +    { +	require (currentElement); +	skipWhitespace (); +	require ('>'); +	handler.endElement (currentElement); +	// not re-reporting any SAXException re bogus end tags, +	// even though that diagnostic might be clearer ... +    } + + +    /** +     * Parse the content of an element. +     * <pre> +     * [43] content ::= (element | CharData | Reference +     *		| CDSect | PI | Comment)* +     * [67] Reference ::= EntityRef | CharRef +     * </pre> +     * <p> NOTE: consumes ETtag. +     */ +    private void parseContent () +    throws Exception +    { +	char c; + +	while (true) { +	    // consume characters (or ignorable whitspace) until delimiter +	    parseCharData (); + +	    // Handle delimiters +	    c = readCh (); +	    switch (c) { + +	    case '&': 			// Found "&" +		c = readCh (); +		if (c == '#') { +		    parseCharRef (); +		} else { +		    unread (c); +		    parseEntityRef (true); +		} +		isDirtyCurrentElement = true; +		break; + +	      case '<': 			// Found "<" +		dataBufferFlush (); +		c = readCh (); +		switch (c) { +		  case '!': 			// Found "<!" +		    c = readCh (); +		    switch (c) { +		      case '-': 		// Found "<!-" +			require ('-'); +			isDirtyCurrentElement = false; +			parseComment (); +			break; +		      case '[': 		// Found "<![" +		      	isDirtyCurrentElement = false; +			require ("CDATA["); +			handler.startCDATA (); +			inCDATA = true; +			parseCDSect (); +			inCDATA = false; +			handler.endCDATA (); +			break; +		      default: +			error ("expected comment or CDATA section", c, null); +	                break; +		    } +		    break; + +		  case '?': 		// Found "<?" +		    isDirtyCurrentElement = false; +		    parsePI (); +		    break; + +		  case '/': 		// Found "</" +		    isDirtyCurrentElement = false; +		    parseETag (); +		    return; + +		  default: 		// Found "<" followed by something else +		    isDirtyCurrentElement = false; +		    unread (c); +		    parseElement (false); +		    break; +		} +	    } +	} +	 +    } + + +    /** +     * Parse an element type declaration. +     * <pre> +     * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' +     * </pre> +     * <p> NOTE: the '<!ELEMENT' has already been read. +     */ +    private void parseElementDecl () +    throws Exception +    { +	String name; + +	requireWhitespace (); +	// Read the element type name. +	name = readNmtoken (true); + +	requireWhitespace (); +	// Read the content model. +	parseContentspec (name); + +	skipWhitespace (); +	require ('>'); +    } + + +    /** +     * Content specification. +     * <pre> +     * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | elements +     * </pre> +     */ +    private void parseContentspec (String name) +    throws Exception +    { +// FIXME: move elementDecl() into setElement(), pass EMTPY/ANY ... +	if (tryRead ("EMPTY")) { +	    setElement (name, CONTENT_EMPTY, null, null); +	    if (!skippedPE) +		handler.getDeclHandler ().elementDecl (name, "EMPTY"); +	    return; +	} else if (tryRead ("ANY")) { +	    setElement (name, CONTENT_ANY, null, null); +	    if (!skippedPE) +		handler.getDeclHandler ().elementDecl (name, "ANY"); +	    return; +	} else { +	    String	model; +	    char	saved [];  + +	    require ('('); +	    saved = readBuffer; +	    dataBufferAppend ('('); +	    skipWhitespace (); +	    if (tryRead ("#PCDATA")) { +		dataBufferAppend ("#PCDATA"); +		parseMixed (saved); +		model = dataBufferToString (); +		setElement (name, CONTENT_MIXED, model, null); +	    } else { +		parseElements (saved); +		model = dataBufferToString (); +		setElement (name, CONTENT_ELEMENTS, model, null); +	    } +	    if (!skippedPE) +		handler.getDeclHandler ().elementDecl (name, model); +	} +    } + +    /** +     * Parse an element-content model. +     * <pre> +     * [47] elements ::= (choice | seq) ('?' | '*' | '+')? +     * [49] choice ::= '(' S? cp (S? '|' S? cp)+ S? ')' +     * [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')' +     * </pre> +     * +     * <p> NOTE: the opening '(' and S have already been read. +     * +     * @param saved Buffer for entity that should have the terminal ')' +     */ +    private void parseElements (char saved []) +    throws Exception +    { +	char c; +	char sep; + +	// Parse the first content particle +	skipWhitespace (); +	parseCp (); + +	// Check for end or for a separator. +	skipWhitespace (); +	c = readCh (); +	switch (c) { +	case ')': +	    // VC: Proper Group/PE Nesting +	    if (readBuffer != saved) +		handler.verror ("Illegal Group/PE nesting"); + +	    dataBufferAppend (')'); +	    c = readCh (); +	    switch (c) { +	    case '*': +	    case '+': +	    case '?': +		dataBufferAppend (c); +		break; +	    default: +		unread (c); +	    } +	    return; +	case ',': 			// Register the separator. +	case '|': +	    sep = c; +	    dataBufferAppend (c); +	    break; +	default: +	    error ("bad separator in content model", c, null); +	    return; +	} + +	// Parse the rest of the content model. +	while (true) { +	    skipWhitespace (); +	    parseCp (); +	    skipWhitespace (); +	    c = readCh (); +	    if (c == ')') { +		// VC: Proper Group/PE Nesting +		if (readBuffer != saved) +		    handler.verror ("Illegal Group/PE nesting"); + +		dataBufferAppend (')'); +		break; +	    } else if (c != sep) { +		error ("bad separator in content model", c, null); +		return; +	    } else { +		dataBufferAppend (c); +	    } +	} + +	// Check for the occurrence indicator. +	c = readCh (); +	switch (c) { +	case '?': +	case '*': +	case '+': +	    dataBufferAppend (c); +	    return; +	default: +	    unread (c); +	    return; +	} +    } + + +    /** +     * Parse a content particle. +     * <pre> +     * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? +     * </pre> +     */ +    private void parseCp () +    throws Exception +    { +	if (tryRead ('(')) { +	    dataBufferAppend ('('); +	    parseElements (readBuffer); +	} else { +	    dataBufferAppend (readNmtoken (true)); +	    char c = readCh (); +	    switch (c) { +	    case '?': +	    case '*': +	    case '+': +		dataBufferAppend (c); +		break; +	    default: +		unread (c); +		break; +	    } +	} +    } + + +    /** +     * Parse mixed content. +     * <pre> +     * [51] Mixed ::= '(' S? ( '#PCDATA' (S? '|' S? Name)*) S? ')*' +     *	      | '(' S? ('#PCDATA') S? ')' +     * </pre> +     * +     * @param saved Buffer for entity that should have the terminal ')' +     */ +    private void parseMixed (char saved []) +    throws Exception +    { +	// Check for PCDATA alone. +	skipWhitespace (); +	if (tryRead (')')) { +	    // VC: Proper Group/PE Nesting +	    if (readBuffer != saved) +		handler.verror ("Illegal Group/PE nesting"); + +	    dataBufferAppend (")*"); +	    tryRead ('*'); +	    return; +	} + +	// Parse mixed content. +	skipWhitespace (); +	while (!tryRead (")")) { +	    require ('|'); +	    dataBufferAppend ('|'); +	    skipWhitespace (); +	    dataBufferAppend (readNmtoken (true)); +	    skipWhitespace (); +	} + +	// VC: Proper Group/PE Nesting +	if (readBuffer != saved) +	    handler.verror ("Illegal Group/PE nesting"); + +	require ('*'); +	dataBufferAppend (")*"); +    } + + +    /** +     * Parse an attribute list declaration. +     * <pre> +     * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' +     * </pre> +     * <p>NOTE: the '<!ATTLIST' has already been read. +     */ +    private void parseAttlistDecl () +    throws Exception +    { +	String elementName; + +	requireWhitespace (); +	elementName = readNmtoken (true); +	boolean white = tryWhitespace (); +	while (!tryRead ('>')) { +	    if (!white) +		error ("whitespace required before attribute definition"); +	    parseAttDef (elementName); +	    white = tryWhitespace (); +	} +    } + + +    /** +     * Parse a single attribute definition. +     * <pre> +     * [53] AttDef ::= S Name S AttType S DefaultDecl +     * </pre> +     */ +    private void parseAttDef (String elementName) +    throws Exception +    { +	String name; +	String type; +	String enumer = null; + +	// Read the attribute name. +	name = readNmtoken (true); + +	// Read the attribute type. +	requireWhitespace (); +	type = readAttType (); + +	// Get the string of enumerated values if necessary. +  if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) { +    if ("ENUMERATION" == type || "NOTATION" == type) +	    enumer = dataBufferToString (); +  } else { +    if ("ENUMERATION".equals(type) || "NOTATION".equals(type)) +	    enumer = dataBufferToString (); +  } + +	// Read the default value. +	requireWhitespace (); +	parseDefault (elementName, name, type, enumer); +    } + + +  /** +   * Parse the attribute type. +   * <pre> +   * [54] AttType ::= StringType | TokenizedType | EnumeratedType +   * [55] StringType ::= 'CDATA' +   * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' +   *		| 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' +   * [57] EnumeratedType ::= NotationType | Enumeration +   * </pre> +   */ +  private String readAttType () +    throws Exception +  { +    if (tryRead ('(')) { +	    parseEnumeration (false); +	    return "ENUMERATION"; +    } else { +	    String typeString = readNmtoken (true); +      if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) { +        if ("NOTATION" == typeString) { +          parseNotationType (); +          return typeString; +        } else if ("CDATA" == typeString +                   || "ID" == typeString +                   || "IDREF" == typeString +                   || "IDREFS" == typeString +                   || "ENTITY" == typeString +                   || "ENTITIES" == typeString +                   || "NMTOKEN" == typeString +                   || "NMTOKENS" == typeString) +          return typeString; +      } else { +        if ("NOTATION".equals(typeString)) { +          parseNotationType (); +          return typeString; +        } else if ("CDATA".equals(typeString) +                   || "ID".equals(typeString) +                   || "IDREF".equals(typeString) +                   || "IDREFS".equals(typeString) +                   || "ENTITY".equals(typeString) +                   || "ENTITIES".equals(typeString) +                   || "NMTOKEN".equals(typeString) +                   || "NMTOKENS".equals(typeString)) +          return typeString; +      } +	    error ("illegal attribute type", typeString, null); +	    return null; +    } +  } +   + +    /** +     * Parse an enumeration. +     * <pre> +     * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' +     * </pre> +     * <p>NOTE: the '(' has already been read. +     */ +    private void parseEnumeration (boolean isNames) +    throws Exception +    { +	dataBufferAppend ('('); + +	// Read the first token. +	skipWhitespace (); +	dataBufferAppend (readNmtoken (isNames)); +	// Read the remaining tokens. +	skipWhitespace (); +	while (!tryRead (')')) { +	    require ('|'); +	    dataBufferAppend ('|'); +	    skipWhitespace (); +	    dataBufferAppend (readNmtoken (isNames)); +	    skipWhitespace (); +	} +	dataBufferAppend (')'); +    } + + +    /** +     * Parse a notation type for an attribute. +     * <pre> +     * [58] NotationType ::= 'NOTATION' S '(' S? NameNtoks +     *		(S? '|' S? name)* S? ')' +     * </pre> +     * <p>NOTE: the 'NOTATION' has already been read +     */ +    private void parseNotationType () +    throws Exception +    { +	requireWhitespace (); +	require ('('); + +	parseEnumeration (true); +    } + + +    /** +     * Parse the default value for an attribute. +     * <pre> +     * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' +     *		| (('#FIXED' S)? AttValue) +     * </pre> +     */ +    private void parseDefault ( +	String elementName, +	String name, +	String type, +	String enumer +    ) throws Exception +    { +	int	valueType = ATTRIBUTE_DEFAULT_SPECIFIED; +	String	value = null; +	int	flags = LIT_ATTRIBUTE; +	boolean	saved = expandPE; +	String	defaultType = null; + +	// LIT_ATTRIBUTE forces '<' checks now (ASAP) and turns whitespace +	// chars to spaces (doesn't matter when that's done if it doesn't +	// interfere with char refs expanding to whitespace). + +	if (!skippedPE) { +    flags |= LIT_ENTITY_REF; +    if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) { +	    if ("CDATA" != type) +        flags |= LIT_NORMALIZE; +    } else { +	    if (!"CDATA".equals(type)) +        flags |= LIT_NORMALIZE; +    } +	} + +	expandPE = false; +	if (tryRead ('#')) { +	    if (tryRead ("FIXED")) { +		defaultType = "#FIXED"; +		valueType = ATTRIBUTE_DEFAULT_FIXED; +		requireWhitespace (); +		value = readLiteral (flags); +	    } else if (tryRead ("REQUIRED")) { +		defaultType = "#REQUIRED"; +		valueType = ATTRIBUTE_DEFAULT_REQUIRED; +	    } else if (tryRead ("IMPLIED")) { +		defaultType = "#IMPLIED"; +		valueType = ATTRIBUTE_DEFAULT_IMPLIED; +	    } else { +		error ("illegal keyword for attribute default value"); +	    } +	} else +	    value = readLiteral (flags); +	expandPE = saved; +	setAttribute (elementName, name, type, enumer, value, valueType); +  if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) { +    if ("ENUMERATION" == type) +	    type = enumer; +    else if ("NOTATION" == type) +	    type = "NOTATION " + enumer; +  } else { +    if ("ENUMERATION".equals(type)) +	    type = enumer; +    else if ("NOTATION".equals(type)) +	    type = "NOTATION " + enumer; +  } +	if (!skippedPE) handler.getDeclHandler () +	    .attributeDecl (elementName, name, type, defaultType, value); +    } + + +    /** +     * Parse a conditional section. +     * <pre> +     * [61] conditionalSect ::= includeSect || ignoreSect +     * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' +     *		extSubsetDecl ']]>' +     * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' +     *		ignoreSectContents* ']]>' +     * [64] ignoreSectContents ::= Ignore +     *		('<![' ignoreSectContents* ']]>' Ignore )* +     * [65] Ignore ::= Char* - (Char* ( '<![' | ']]>') Char* ) +     * </pre> +     * <p> NOTE: the '>![' has already been read. +     */ +    private void parseConditionalSect (char saved []) +    throws Exception +    { +	skipWhitespace (); +	if (tryRead ("INCLUDE")) { +	    skipWhitespace (); +	    require ('['); +	    // VC: Proper Conditional Section/PE Nesting +	    if (readBuffer != saved) +		handler.verror ("Illegal Conditional Section/PE nesting"); +	    skipWhitespace (); +	    while (!tryRead ("]]>")) { +		parseMarkupdecl (); +		skipWhitespace (); +	    } +	} else if (tryRead ("IGNORE")) { +	    skipWhitespace (); +	    require ('['); +	    // VC: Proper Conditional Section/PE Nesting +	    if (readBuffer != saved) +		handler.verror ("Illegal Conditional Section/PE nesting"); +	    int nesting = 1; +	    char c; +	    expandPE = false; +	    for (int nest = 1; nest > 0;) { +		c = readCh (); +		switch (c) { +		case '<': +		    if (tryRead ("![")) { +			nest++; +		    } +		case ']': +		    if (tryRead ("]>")) { +			nest--; +		    } +		} +	    } +	    expandPE = true; +	} else { +	    error ("conditional section must begin with INCLUDE or IGNORE"); +	} +    } + +  private void parseCharRef () +    throws SAXException, IOException +  { +    parseCharRef (true /* do flushDataBuffer by default */); +  } + +  /** +   * Try to read a character reference without consuming data from buffer. +   * <pre> +   * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' +   * </pre> +   * <p>NOTE: the '&#' has already been read. +   */ +  private void tryReadCharRef () +  throws SAXException, IOException +  { +  	int value = 0; +	char c; + +	if (tryRead ('x')) { +loop1: +	    while (true) { +		c = readCh (); +		int n; +		switch (c) { +		case '0': case '1': case '2': case '3': case '4': +		case '5': case '6': case '7': case '8': case '9': +		    n = c - '0'; +		    break; +		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': +		    n = (c - 'a') + 10; +		    break; +		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': +		    n = (c - 'A') + 10; +		    break; +		case ';': +		    break loop1; +		default: +		    error ("illegal character in character reference", c, null); +		    break loop1; +		} +		value *= 16; +		value += n; +	    } +	} else { +loop2: +	    while (true) { +		c = readCh (); +		switch (c) { +		case '0': case '1': case '2': case '3': case '4': +		case '5': case '6': case '7': case '8': case '9': +		    value *= 10; +		    value += c - '0'; +		    break; +		case ';': +		    break loop2; +		default: +		    error ("illegal character in character reference", c, null); +		    break loop2; +		} +	    } +	} + +	// check for character refs being legal XML +	if ((value < 0x0020 +		&& ! (value == '\n' || value == '\t' || value == '\r')) +		|| (value >= 0xD800 && value <= 0xDFFF) +		|| value == 0xFFFE || value == 0xFFFF +		|| value > 0x0010ffff) +	    error ("illegal XML character reference U+" +		    + Integer.toHexString (value)); + +	// Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz +	//  (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz: +	if (value > 0x0010ffff) { +	    // too big for surrogate +	    error ("character reference " + value + " is too large for UTF-16", +		   new Integer (value).toString (), null); +	} + +  } +   +    /** +     * Read and interpret a character reference. +     * <pre> +     * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' +     * </pre> +     * <p>NOTE: the '&#' has already been read. +     */ +    private void parseCharRef (boolean doFlush) +    throws SAXException, IOException +    { +	int value = 0; +	char c; + +	if (tryRead ('x')) { +loop1: +	    while (true) { +		c = readCh (); +		int n; +		switch (c) { +		case '0': case '1': case '2': case '3': case '4': +		case '5': case '6': case '7': case '8': case '9': +		    n = c - '0'; +		    break; +		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': +		    n = (c - 'a') + 10; +		    break; +		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': +		    n = (c - 'A') + 10; +		    break; +		case ';': +		    break loop1; +		default: +		    error ("illegal character in character reference", c, null); +		    break loop1; +		} +		value *= 16; +		value += n; +	    } +	} else { +loop2: +	    while (true) { +		c = readCh (); +		switch (c) { +		case '0': case '1': case '2': case '3': case '4': +		case '5': case '6': case '7': case '8': case '9': +		    value *= 10; +		    value += c - '0'; +		    break; +		case ';': +		    break loop2; +		default: +		    error ("illegal character in character reference", c, null); +		    break loop2; +		} +	    } +	} + +	// check for character refs being legal XML +	if ((value < 0x0020 +		&& ! (value == '\n' || value == '\t' || value == '\r')) +		|| (value >= 0xD800 && value <= 0xDFFF) +		|| value == 0xFFFE || value == 0xFFFF +		|| value > 0x0010ffff) +	    error ("illegal XML character reference U+" +		    + Integer.toHexString (value)); + +	// Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz +	//  (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz: +	if (value <= 0x0000ffff) { +	    // no surrogates needed +	    dataBufferAppend ((char) value); +	} else if (value <= 0x0010ffff) { +	    value -= 0x10000; +	    // > 16 bits, surrogate needed +	    dataBufferAppend ((char) (0xd800 | (value >> 10))); +	    dataBufferAppend ((char) (0xdc00 | (value & 0x0003ff))); +	} else { +	    // too big for surrogate +	    error ("character reference " + value + " is too large for UTF-16", +		   new Integer (value).toString (), null); +	} +  if (doFlush) dataBufferFlush (); +    } + + +    /** +     * Parse and expand an entity reference. +     * <pre> +     * [68] EntityRef ::= '&' Name ';' +     * </pre> +     * <p>NOTE: the '&' has already been read. +     * @param externalAllowed External entities are allowed here. +     */ +    private void parseEntityRef (boolean externalAllowed) +    throws SAXException, IOException +    { +	String name; + +	name = readNmtoken (true); +	require (';'); +	switch (getEntityType (name)) { +	case ENTITY_UNDECLARED: +	    // NOTE:  XML REC describes amazingly convoluted handling for +	    // this case.  Nothing as meaningful as being a WFness error +	    // unless the processor might _legitimately_ not have seen a +	    // declaration ... which is what this implements. +	    String	message; +	     +	    message = "reference to undeclared general entity " + name; +	    if (skippedPE && !docIsStandalone) { +		handler.verror (message); +		// we don't know this entity, and it might be external... +		if (externalAllowed) +		    handler.skippedEntity (name); +	    } else +		error (message); +	    break; +	case ENTITY_INTERNAL: +            pushString (name, getEntityValue (name)); +	     +	    //workaround for possible input pop before marking +            //the buffer reading position	 +            char t = readCh (); +            unread (t); +            int bufferPosMark = readBufferPos; +            +            int end = readBufferPos + getEntityValue (name).length(); +            for(int k = readBufferPos; k < end; k++){ +	            t = readCh (); +	            if (t == '&'){ +	            	t = readCh ();    +	            	if (t  == '#'){  +	            	   //try to match a character ref +	                   tryReadCharRef (); +	                    +	                   //everything has been read +	                   if (readBufferPos >= end) +	                      break; +	                   k = readBufferPos; +	                   continue; +	                } +	                else if (Character.isLetter(t)){ +	            	   //looks like an entity ref +	            	   unread (t); +	            	   readNmtoken (true); +	        	   require (';'); +	        	 +	        	   //everything has been read +	        	   if (readBufferPos >= end) +		              break; +		           k = readBufferPos; +	                   continue; +	                } +	                error(" malformed entity reference"); +	            } +	            +            } +            readBufferPos = bufferPosMark; +	    break; +	case ENTITY_TEXT: +	    if (externalAllowed) { +		pushURL (false, name, getEntityIds (name), +			null, null, null, true); +	    } else { +		error ("reference to external entity in attribute value.", +			name, null); +	    } +	    break; +	case ENTITY_NDATA: +	    if (externalAllowed) { +		error ("unparsed entity reference in content", name, null); +	    } else { +		error ("reference to external entity in attribute value.", +			name, null); +	    } +	    break; +	default: +	    throw new RuntimeException (); +	} +    } + + +    /** +     * Parse and expand a parameter entity reference. +     * <pre> +     * [69] PEReference ::= '%' Name ';' +     * </pre> +     * <p>NOTE: the '%' has already been read. +     */ +    private void parsePEReference () +    throws SAXException, IOException +    { +	String name; + +	name = "%" + readNmtoken (true); +	require (';'); +	switch (getEntityType (name)) { +	case ENTITY_UNDECLARED: +	    // VC: Entity Declared +	    handler.verror ("reference to undeclared parameter entity " + name); + +	    // we should disable handling of all subsequent declarations +	    // unless this is a standalone document (info discarded) +	    break; +	case ENTITY_INTERNAL: +	    if (inLiteral) +		pushString (name, getEntityValue (name)); +	    else +		pushString (name, ' ' + getEntityValue (name) + ' '); +	    break; +	case ENTITY_TEXT: +	    if (!inLiteral) +		pushString (null, " "); +	    pushURL (true, name, getEntityIds (name), null, null, null, true); +	    if (!inLiteral) +		pushString (null, " "); +	    break; +	} +    } + +    /** +     * Parse an entity declaration. +     * <pre> +     * [70] EntityDecl ::= GEDecl | PEDecl +     * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' +     * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' +     * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) +     * [74] PEDef ::= EntityValue | ExternalID +     * [75] ExternalID ::= 'SYSTEM' S SystemLiteral +     *		   | 'PUBLIC' S PubidLiteral S SystemLiteral +     * [76] NDataDecl ::= S 'NDATA' S Name +     * </pre> +     * <p>NOTE: the '<!ENTITY' has already been read. +     */ +    private void parseEntityDecl () +    throws Exception +    { +	boolean peFlag = false; +	int flags = 0; + +	// Check for a parameter entity. +	expandPE = false; +	requireWhitespace (); +	if (tryRead ('%')) { +	    peFlag = true; +	    requireWhitespace (); +	} +	expandPE = true; + +	// Read the entity name, and prepend +	// '%' if necessary. +	String name = readNmtoken (true); +        //NE08 +	if (name.indexOf(':') >= 0) +           error ("Illegal character(':') in entity name ", name, null); +	if (peFlag) { +	    name = "%" + name; +	} + +	// Read the entity value. +	requireWhitespace (); +	char c = readCh (); +	unread (c); +	if (c == '"' || c == '\'') { +	    // Internal entity ... replacement text has expanded refs +	    // to characters and PEs, but not to general entities +	    String value = readLiteral (flags); +	    setInternalEntity (name, value); +	} else { +	    // Read the external IDs +	    String ids [] = readExternalIds (false, false); + +	    // Check for NDATA declaration. +	    boolean white = tryWhitespace (); +	    if (!peFlag && tryRead ("NDATA")) { +		if (!white) +		    error ("whitespace required before NDATA"); +		requireWhitespace (); +		String notationName = readNmtoken (true); +		if (!skippedPE) { +		    setExternalEntity (name, ENTITY_NDATA, ids, notationName); +		    handler.unparsedEntityDecl (name, ids, notationName); +		} +	    } else if (!skippedPE) { +		setExternalEntity (name, ENTITY_TEXT, ids, null); +		handler.getDeclHandler () +		    .externalEntityDecl (name, ids [0], +			    handler.resolveURIs () +	    				// FIXME: ASSUMES not skipped +					// "false" forces error on bad URI +				? handler.absolutize (ids [2], ids [1], false) +				: ids [1]); +	    } +	} + +	// Finish the declaration. +	skipWhitespace (); +	require ('>'); +    } + + +    /** +     * Parse a notation declaration. +     * <pre> +     * [82] NotationDecl ::= '<!NOTATION' S Name S +     *		(ExternalID | PublicID) S? '>' +     * [83] PublicID ::= 'PUBLIC' S PubidLiteral +     * </pre> +     * <P>NOTE: the '<!NOTATION' has already been read. +     */ +    private void parseNotationDecl () +    throws Exception +    { +	String nname, ids[]; + + +	requireWhitespace (); +	nname = readNmtoken (true); +        //NE08 +	if (nname.indexOf(':') >= 0) +           error ("Illegal character(':') in notation name ", nname, null); +	requireWhitespace (); + +	// Read the external identifiers. +	ids = readExternalIds (true, false); + +	// Register the notation. +	setNotation (nname, ids); + +	skipWhitespace (); +	require ('>'); +    } + + +    /** +     * Parse character data. +     * <pre> +     * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) +     * </pre> +     */ +    private void parseCharData () +    throws Exception +    { +	char	c; +	int	state = 0; +	boolean pureWhite = false; + +	// assert (dataBufferPos == 0); + +	// are we expecting pure whitespace?  it might be dirty... +	if ((currentElementContent == CONTENT_ELEMENTS) && !isDirtyCurrentElement) +	    pureWhite = true; + +	// always report right out of readBuffer +	// to minimize (pointless) buffer copies +	while (true) { +	    int lineAugment = 0; +	    int columnAugment = 0; +	    int i; + +loop: +	    for (i = readBufferPos; i < readBufferLength; i++) { +		switch (c = readBuffer [i]) { +		case '\n': +		    lineAugment++; +		    columnAugment = 0; +		    // pureWhite unmodified +		    break; +		case '\r':	// should not happen!! +		case '\t': +		case ' ': +		    // pureWhite unmodified +		    columnAugment++; +		    break; +		case '&': +		case '<': +		    columnAugment++; +		    // pureWhite unmodified +		    // CLEAN end of text sequence +		    state = 1; +		    break loop; +		case ']': +		    // that's not a whitespace char, and +		    // can not terminate pure whitespace either +		    pureWhite = false; +		    if ((i + 2) < readBufferLength) { +			if (readBuffer [i + 1] == ']' +				&& readBuffer [i + 2] == '>') { +			    // ERROR end of text sequence +			    state = 2; +			    break loop; +			} +		    } else { +			// FIXME missing two end-of-buffer cases +		    } +		    columnAugment++; +		    break; +		default: +			if ((c < 0x0020 || c > 0xFFFD) +			   || ((c >= 0x007f) && (c <= 0x009f) && (c != 0x0085)  +			       && xmlVersion == XML_11))  +				error ("illegal XML character U+" +					+ Integer.toHexString (c)); +		    // that's not a whitespace char +		    pureWhite = false; +		    columnAugment++; +		} +	    } + +	    // report text thus far +	    if (lineAugment > 0) { +		line += lineAugment; +		column = columnAugment; +	    } else { +		column += columnAugment; +	    } + +	    // report characters/whitspace +	    int		length = i - readBufferPos; + +	    if (length != 0) { +		if (pureWhite) +		    handler.ignorableWhitespace (readBuffer, +		    		readBufferPos, length); +		else +		    handler.charData (readBuffer, readBufferPos, length); +		readBufferPos = i; +	    } +	     +	    if (state != 0) +		break; + +	    // fill next buffer from this entity, or +	    // pop stack and continue with previous entity +	    unread (readCh ()); +	} +        if (!pureWhite) +           isDirtyCurrentElement = true; +	// finish, maybe with error +	if (state != 1)	// finish, no error +	    error ("character data may not contain ']]>'"); +    } + + +    ////////////////////////////////////////////////////////////////////// +    // High-level reading and scanning methods. +    ////////////////////////////////////////////////////////////////////// + +    /** +     * Require whitespace characters. +     */ +    private void requireWhitespace () +    throws SAXException, IOException +    { +	char c = readCh (); +	if (isWhitespace (c)) { +	    skipWhitespace (); +	} else { +	    error ("whitespace required", c, null); +	} +    } + + +    /** +     * Skip whitespace characters. +     * <pre> +     * [3] S ::= (#x20 | #x9 | #xd | #xa)+ +     * </pre> +     */ +    private void skipWhitespace () +    throws SAXException, IOException +    { +	// Start with a little cheat.  Most of +	// the time, the white space will fall +	// within the current read buffer; if +	// not, then fall through. +	if (USE_CHEATS) { +	    int lineAugment = 0; +	    int columnAugment = 0; + +loop: +	    for (int i = readBufferPos; i < readBufferLength; i++) { +		switch (readBuffer [i]) { +		case ' ': +		case '\t': +		case '\r': +		    columnAugment++; +		    break; +		case '\n': +		    lineAugment++; +		    columnAugment = 0; +		    break; +		case '%': +		    if (expandPE) +			break loop; +		    // else fall through... +		default: +		    readBufferPos = i; +		    if (lineAugment > 0) { +			line += lineAugment; +			column = columnAugment; +		    } else { +			column += columnAugment; +		    } +		    return; +		} +	    } +	} + +	// OK, do it the slow way. +	char c = readCh (); +	while (isWhitespace (c)) { +	    c = readCh (); +	} +	unread (c); +    } + + +    /** +     * Read a name or (when parsing an enumeration) name token. +     * <pre> +     * [5] Name ::= (Letter | '_' | ':') (NameChar)* +     * [7] Nmtoken ::= (NameChar)+ +     * </pre> +     */ +    private String readNmtoken (boolean isName) +    throws SAXException, IOException +    { +	char c; + +	if (USE_CHEATS) { +loop: +	    for (int i = readBufferPos; i < readBufferLength; i++) { +		c = readBuffer [i]; +		switch (c) { +		  case '%': +		    if (expandPE) +			break loop; +		    // else fall through... + +		    // What may legitimately come AFTER a name/nmtoken? +		  case '<': case '>': case '&': +		  case ',': case '|': case '*': case '+': case '?': +		  case ')': +		  case '=': +		  case '\'': case '"': +		  case '[': +		  case ' ': case '\t': case '\r': case '\n': +		  case ';': +		  case '/': +		    int start = readBufferPos; +		    if (i == start) +			error ("name expected", readBuffer [i], null); +		    readBufferPos = i; +		    return intern (readBuffer, start, i - start); + +		  default: +// FIXME ... per IBM's OASIS test submission, these: +//   ?		U+06dd  +//   Combining	U+309B +		    //these switches are kind of ugly but at least we won't +		    //have to go over the whole lits for each char +		    if (isName && i == readBufferPos){ +			    char c2 = (char) (c & 0x00f0); +	                    switch (c & 0xff00){ +	                    	//starting with 01 +	                    	case 0x0100: +	                       	    switch (c2){ +	                    	        case 0x0030: +	                    	            if (c == 0x0132 || c == 0x0133 || c == 0x013f) +	                    	            	error ("Not a name start character, U+" +	              				       + Integer.toHexString (c)); +	                    	        break; +	                    	        case 0x0040: +	                	            if (c == 0x0140 || c == 0x0149) +	                	            	error ("Not a name start character, U+" +	          				       + Integer.toHexString (c)); +	                	        break; +	                    	        case 0x00c0: +	            	                    if (c == 0x01c4 || c == 0x01cc) +	            	            	        error ("Not a name start character, U+" +	      				               + Integer.toHexString (c)); +	            	                break; +	                    	        case 0x00f0: +	        	                    if (c == 0x01f1 || c == 0x01f3) +	        	            	        error ("Not a name start character, U+" +	  				               + Integer.toHexString (c)); +	        	                break; +	                    	        case 0x00b0: +	    	                            if (c == 0x01f1 || c == 0x01f3) +	    	            	                error ("Not a name start character, U+" +					               + Integer.toHexString (c)); +	    	                        break; +	        	                default: +	        	                    if (c == 0x017f) +	                	            	error ("Not a name start character, U+" +	          				        + Integer.toHexString (c));	 +	                    	    } +				     +	                    	break; +	                    	//starting with 11 +	                    	case 0x1100: +	                            switch (c2){ +	                                case 0x0000: +	                                    if (c == 0x1104 || c == 0x1108 || +	                                    	c == 0x110a || c == 0x110d) +	                                      	error ("Not a name start character, U+" +	                      		             + Integer.toHexString (c)); +	                                break; +	                                case 0x0030: +	                                    if (c == 0x113b || c == 0x113f) +	                                      	error ("Not a name start character, U+" +	                          	               + Integer.toHexString (c)); +	                                break; +	                                case 0x0040: +	                                    if (c == 0x1141 || c == 0x114d +	                                        || c == 0x114f ) +	                                      	error ("Not a name start character, U+" +	                          	               + Integer.toHexString (c)); +	                                break; +	                                case 0x0050: +	                                     if (c == 0x1151 || c == 0x1156) +	                                         error ("Not a name start character, U+" +	                          		        + Integer.toHexString (c)); +	                                break; +	                                case 0x0060: +		                             if (c == 0x1162 || c == 0x1164 +		                             	 || c == 0x1166 || c == 0x116b +						 || c == 0x116f) +		                                 error ("Not a name start character, U+" +		                          		 + Integer.toHexString (c)); +		                                break; +	                                case 0x00b0: +	                                     if (c == 0x11b6 || c == 0x11b9 +	                                         || c == 0x11bb || c == 0x116f) +	                                         error ("Not a name start character, U+" +	                          		        + Integer.toHexString (c)); +	                                break; +	                                default: +	                                    if (c == 0x1174 || c == 0x119f +	                                    	|| c == 0x11ac || c == 0x11c3 +						|| c == 0x11f1) +	                                        error ("Not a name start character, U+" +	                                                + Integer.toHexString (c)); +	                            } +	                        break; +	                        default: +	                           if (c == 0x0e46 || c == 0x1011  +	                               || c == 0x212f || c == 0x0587 +				       || c == 0x0230 ) +	                	       error ("Not a name start character, U+" +	          		              + Integer.toHexString (c)); +	                    } +		    } +		    // punt on exact tests from Appendix A; approximate +		    // them using the Unicode ID start/part rules +		    if (i == readBufferPos && isName) { +			if (!Character.isUnicodeIdentifierStart (c) +				&& c != ':' && c != '_') +			    error ("Not a name start character, U+" +				  + Integer.toHexString (c)); +		    } else if (!Character.isUnicodeIdentifierPart (c) +			    && c != '-' && c != ':' && c != '_' && c != '.' +			    && !isExtender (c)) +			error ("Not a name character, U+" +				+ Integer.toHexString (c)); +		} +	    } +	} + +	nameBufferPos = 0; + +	// Read the first character. +loop: +	while (true) { +	    c = readCh (); +	    switch (c) { +	    case '%': +	    case '<': case '>': case '&': +	    case ',': case '|': case '*': case '+': case '?': +	    case ')': +	    case '=': +	    case '\'': case '"': +	    case '[': +	    case ' ': case '\t': case '\n': case '\r': +	    case ';': +	    case '/': +		unread (c); +		if (nameBufferPos == 0) { +		    error ("name expected"); +		} +		// punt on exact tests from Appendix A, but approximate them +		if (isName +			&& !Character.isUnicodeIdentifierStart ( +				nameBuffer [0]) +			&& ":_".indexOf (nameBuffer [0]) == -1) +		    error ("Not a name start character, U+" +			      + Integer.toHexString (nameBuffer [0])); +		String s = intern (nameBuffer, 0, nameBufferPos); +		nameBufferPos = 0; +		return s; +	    default: +		// punt on exact tests from Appendix A, but approximate them + +		if ((nameBufferPos != 0 || !isName) +			&& !Character.isUnicodeIdentifierPart (c) +			&& ":-_.".indexOf (c) == -1 +			&& !isExtender (c)) +		    error ("Not a name character, U+" +			    + Integer.toHexString (c)); +		if (nameBufferPos >= nameBuffer.length) +		    nameBuffer = +			(char[]) extendArray (nameBuffer, +				    nameBuffer.length, nameBufferPos); +		nameBuffer [nameBufferPos++] = c; +	    } +	} +    } + +    private static boolean isExtender (char c) +    { +	// [88] Extender ::= ... +	return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387 +	       || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005 +	       || (c >= 0x3031 && c <= 0x3035) +	       || (c >= 0x309d && c <= 0x309e) +	       || (c >= 0x30fc && c <= 0x30fe); +    } + + +    /** +     * Read a literal.  With matching single or double quotes as +     * delimiters (and not embedded!) this is used to parse: +     * <pre> +     *	[9] EntityValue ::= ... ([^%&] | PEReference | Reference)* ... +     *	[10] AttValue ::= ... ([^<&] | Reference)* ... +     *	[11] SystemLiteral ::= ... (URLchar - "'")* ... +     *	[12] PubidLiteral ::= ... (PubidChar - "'")* ... +     * </pre> +     * as well as the quoted strings in XML and text declarations +     * (for version, encoding, and standalone) which have their +     * own constraints. +     */ +    private String readLiteral (int flags) +    throws SAXException, IOException +    { +	char	delim, c; +	int	startLine = line; +	boolean	saved = expandPE; +	boolean	savedReport = doReport; + +	// Find the first delimiter. +	delim = readCh (); +	if (delim != '"' && delim != '\'') { +	    error ("expected '\"' or \"'\"", delim, null); +	    return null; +	} +	inLiteral = true; +	if ((flags & LIT_DISABLE_PE) != 0) +	    expandPE = false; +	doReport = false; + +	// Each level of input source has its own buffer; remember +	// ours, so we won't read the ending delimiter from any +	// other input source, regardless of entity processing. +	char ourBuf [] = readBuffer; + +	// Read the literal. +	try { +	    c = readCh (); +	    boolean ampRead = false; +loop: +	    while (! (c == delim && readBuffer == ourBuf)) { +		switch (c) { +		    // attributes and public ids are normalized +		    // in almost the same ways +		case '\n': +		case '\r': +		    if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0) +			c = ' '; +		    break; +		case '\t': +		    if ((flags & LIT_ATTRIBUTE) != 0) +			c = ' '; +		    break; +		case '&': +		    c = readCh (); +		    // Char refs are expanded immediately, except for +		    // all the cases where it's deferred. +		    if (c == '#') { +			if ((flags & LIT_DISABLE_CREF) != 0) { +			    dataBufferAppend ('&'); +			    break; +			} +                        parseCharRef (false /* Do not do flushDataBuffer */); + +			// exotic WFness risk: this is an entity literal, +			// dataBuffer [dataBufferPos - 1] == '&', and +			// following chars are a _partial_ entity/char ref +                    +		    // It looks like an entity ref ... +		    } else { +			unread (c); +			// Expand it? +			if ((flags & LIT_ENTITY_REF) > 0) { +			    parseEntityRef (false); +			    if (String.valueOf (readBuffer).equals("&")) +			    	ampRead = true; +                        //Is it just data? +			} else if ((flags & LIT_DISABLE_EREF) != 0) { +			    dataBufferAppend ('&'); + +			// OK, it will be an entity ref -- expanded later. +			} else { +			    String name = readNmtoken (true); +			    require (';'); +			    dataBufferAppend ('&'); +			    dataBufferAppend (name); +			    dataBufferAppend (';'); +			} +		    } +		    c = readCh (); +		    continue loop; + +		case '<': +		    // and why?  Perhaps so "&foo;" expands the same +		    // inside and outside an attribute? +		    if ((flags & LIT_ATTRIBUTE) != 0) +			error ("attribute values may not contain '<'"); +		    break; + +		// We don't worry about case '%' and PE refs, readCh does. + +		default: +		    break; +		} +		dataBufferAppend (c); +		c = readCh (); +	    } +	} catch (EOFException e) { +	    error ("end of input while looking for delimiter (started on line " +		   + startLine + ')', null, new Character (delim).toString ()); +	} +	inLiteral = false; +	expandPE = saved; +	doReport = savedReport; + +	// Normalise whitespace if necessary. +	if ((flags & LIT_NORMALIZE) > 0) { +	    dataBufferNormalize (); +	} + +	// Return the value. +	return dataBufferToString (); +    } + + +    /** +     * Try reading external identifiers. +     * A system identifier is not required for notations. +     * @param inNotation Are we parsing a notation decl? +     * @param isSubset Parsing external subset decl (may be omitted)? +     * @return A three-member String array containing the identifiers, +     *	or nulls. Order: public, system, baseURI. +     */ +    private String[] readExternalIds (boolean inNotation, boolean isSubset) +    throws Exception +    { +	char	c; +	String	ids[] = new String [3]; +	int	flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; + +	if (tryRead ("PUBLIC")) { +	    requireWhitespace (); +	    ids [0] = readLiteral (LIT_NORMALIZE | LIT_PUBID | flags); +	    if (inNotation) { +		skipWhitespace (); +		c = readCh (); +		unread (c); +		if (c == '"' || c == '\'') { +		    ids [1] = readLiteral (flags); +		} +	    } else { +		requireWhitespace (); +		ids [1] = readLiteral (flags); +	    } + +	    for (int i = 0; i < ids [0].length (); i++) { +		c = ids [0].charAt (i); +		if (c >= 'a' && c <= 'z') +		    continue; +		if (c >= 'A' && c <= 'Z') +		    continue; +		if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf (c) != -1) +		    continue; +		error ("illegal PUBLIC id character U+" +			+ Integer.toHexString (c)); +	    } +	} else if (tryRead ("SYSTEM")) { +	    requireWhitespace (); +	    ids [1] = readLiteral (flags); +	} else if (!isSubset)  +		error ("missing SYSTEM or PUBLIC keyword"); + +	if (ids [1] != null) { +	    if (ids [1].indexOf ('#') != -1) +		handler.verror ("SYSTEM id has a URI fragment: " + ids [1]); +	    ids [2] = handler.getSystemId (); +	    if (ids [2] == null) +		handler.warn ("No base URI; hope URI is absolute: " +			+ ids [1]); +	} + +	return ids; +    } + + +    /** +     * Test if a character is whitespace. +     * <pre> +     * [3] S ::= (#x20 | #x9 | #xd | #xa)+ +     * </pre> +     * @param c The character to test. +     * @return true if the character is whitespace. +     */ +    private final boolean isWhitespace (char c) +    { +	if (c > 0x20) +	    return false; +	if (c == 0x20 || c == 0x0a || c == 0x09 || c == 0x0d) +	    return true; +	return false;	// illegal ... +    } + + +    ////////////////////////////////////////////////////////////////////// +    // Utility routines. +    ////////////////////////////////////////////////////////////////////// + + +    /** +     * Add a character to the data buffer. +     */ +    private void dataBufferAppend (char c) +    { +	// Expand buffer if necessary. +	if (dataBufferPos >= dataBuffer.length) +	    dataBuffer = +		(char[]) extendArray (dataBuffer, +			dataBuffer.length, dataBufferPos); +	dataBuffer [dataBufferPos++] = c; +    } + + +    /** +     * Add a string to the data buffer. +     */ +    private void dataBufferAppend (String s) +    { +	dataBufferAppend (s.toCharArray (), 0, s.length ()); +    } + + +    /** +     * Append (part of) a character array to the data buffer. +     */ +    private void dataBufferAppend (char ch[], int start, int length) +    { +	dataBuffer = (char[]) +		extendArray (dataBuffer, dataBuffer.length, +				    dataBufferPos + length); + +	System.arraycopy (ch, start, dataBuffer, dataBufferPos, length); +	dataBufferPos += length; +    } + + +    /** +     * Normalise space characters in the data buffer. +     */ +    private void dataBufferNormalize () +    { +	int i = 0; +	int j = 0; +	int end = dataBufferPos; + +	// Skip spaces at the start. +	while (j < end && dataBuffer [j] == ' ') { +	    j++; +	} + +	// Skip whitespace at the end. +	while (end > j && dataBuffer [end - 1] == ' ') { +	    end --; +	} + +	// Start copying to the left. +	while (j < end) { + +	    char c = dataBuffer [j++]; + +	    // Normalise all other spaces to +	    // a single space. +	    if (c == ' ') { +		while (j < end && dataBuffer [j++] == ' ') +		    continue; +		dataBuffer [i++] = ' '; +		dataBuffer [i++] = dataBuffer [j - 1]; +	    } else { +		dataBuffer [i++] = c; +	    } +	} + +	// The new length is <= the old one. +	dataBufferPos = i; +    } + + +    /** +     * Convert the data buffer to a string. +     */ +    private String dataBufferToString () +    { +	String s = new String (dataBuffer, 0, dataBufferPos); +	dataBufferPos = 0; +	return s; +    } + + +    /** +     * Flush the contents of the data buffer to the handler, as +     * appropriate, and reset the buffer for new input. +     */ +    private void dataBufferFlush () +    throws SAXException +    { +	if (currentElementContent == CONTENT_ELEMENTS +		&& dataBufferPos > 0 +		&& !inCDATA +		) { +	    // We can't just trust the buffer to be whitespace, there +	    // are (error) cases when it isn't +	    for (int i = 0; i < dataBufferPos; i++) { +		if (!isWhitespace (dataBuffer [i])) { +		    handler.charData (dataBuffer, 0, dataBufferPos); +		    dataBufferPos = 0; +		} +	    } +	    if (dataBufferPos > 0) { +		handler.ignorableWhitespace (dataBuffer, 0, dataBufferPos); +		dataBufferPos = 0; +	    } +	} else if (dataBufferPos > 0) { +	    handler.charData (dataBuffer, 0, dataBufferPos); +	    dataBufferPos = 0; +	} +    } + + +    /** +     * Require a string to appear, or throw an exception. +     * <p><em>Precondition:</em> Entity expansion is not required. +     * <p><em>Precondition:</em> data buffer has no characters that +     * will get sent to the application. +     */ +    private void require (String delim) +    throws SAXException, IOException +    { +	int	length = delim.length (); +	char	ch []; +		 +	if (length < dataBuffer.length) { +	    ch = dataBuffer; +	    delim.getChars (0, length, ch, 0); +	} else +	    ch = delim.toCharArray (); + +	if (USE_CHEATS +		&& length <= (readBufferLength - readBufferPos)) { +	    int offset = readBufferPos; + +	    for (int i = 0; i < length; i++, offset++) +		if (ch [i] != readBuffer [offset]) +		    error ("required string", null, delim); +	    readBufferPos = offset; +	     +	} else { +	    for (int i = 0; i < length; i++) +		require (ch [i]); +	} +    } + + +    /** +     * Require a character to appear, or throw an exception. +     */ +    private void require (char delim) +    throws SAXException, IOException +    { +	char c = readCh (); + +	if (c != delim) { +	    error ("required character", c, new Character (delim).toString ()); +	} +    } + + +    /** +     * Create an interned string from a character array. +     * Ælfred uses this method to create an interned version +     * of all names and name tokens, so that it can test equality +     * with <code>==</code> instead of <code>String.equals ()</code>. +     * +     * <p>This is much more efficient than constructing a non-interned +     * string first, and then interning it. +     * +     * @param ch an array of characters for building the string. +     * @param start the starting position in the array. +     * @param length the number of characters to place in the string. +     * @return an interned string. +     * @see #intern (String) +     * @see java.lang.String#intern +     */ +    public String intern (char ch[], int start, int length) +    { +	int	index = 0; +	int	hash = 0; +	Object	bucket []; + +	// Generate a hash code.  This is a widely used string hash, +	// often attributed to Brian Kernighan. +	for (int i = start; i < start + length; i++) +	    hash = 31 * hash + ch [i]; +	hash = (hash & 0x7fffffff) % SYMBOL_TABLE_LENGTH; + +	// Get the bucket -- consists of {array,String} pairs +	if ((bucket = symbolTable [hash]) == null) { +	    // first string in this bucket +	    bucket = new Object [8]; + +	// Search for a matching tuple, and +	// return the string if we find one. +	} else { +	    while (index < bucket.length) { +		char chFound [] = (char []) bucket [index]; + +		// Stop when we hit an empty entry. +		if (chFound == null) +		    break; + +		// If they're the same length, check for a match. +		if (chFound.length == length) { +		    for (int i = 0; i < chFound.length; i++) { +			// continue search on failure +			if (ch [start + i] != chFound [i]) { +			    break; +			} else if (i == length - 1) { +			    // That's it, we have a match! +			    return (String) bucket [index + 1]; +			} +		    } +		} +		index += 2; +	    } +	    // Not found -- we'll have to add it. + +	    // Do we have to grow the bucket? +	    bucket = (Object []) extendArray (bucket, bucket.length, index); +	} +	symbolTable [hash] = bucket; + +	// OK, add it to the end of the bucket -- "local" interning. +	// Intern "globally" to let applications share interning benefits. +	// That is, "!=" and "==" work on our strings, not just equals(). +	String s = new String (ch, start, length).intern (); +	bucket [index] = s.toCharArray (); +	bucket [index + 1] = s; +	return s; +    } + +    /** +     * Ensure the capacity of an array, allocating a new one if +     * necessary.  Usually extends only for name hash collisions.  +     */ +    private Object extendArray (Object array, int currentSize, int requiredSize) +    { +	if (requiredSize < currentSize) { +	    return array; +	} else { +	    Object newArray = null; +	    int newSize = currentSize * 2; + +	    if (newSize <= requiredSize) +		newSize = requiredSize + 1; + +	    if (array instanceof char[]) +		newArray = new char [newSize]; +	    else if (array instanceof Object[]) +		newArray = new Object [newSize]; +	    else +		throw new RuntimeException (); + +	    System.arraycopy (array, 0, newArray, 0, currentSize); +	    return newArray; +	} +    } + + +    ////////////////////////////////////////////////////////////////////// +    // XML query routines. +    ////////////////////////////////////////////////////////////////////// + + +    boolean isStandalone () { return docIsStandalone; } + + +    // +    // Elements +    // + +    private int getContentType (Object element [], int defaultType) +    { +	int retval; + +	if (element == null) +	    return defaultType; +	retval = ((Integer) element [0]).intValue (); +	if (retval == CONTENT_UNDECLARED) +	    retval = defaultType; +	return retval; +    } + + +    /** +     * Look up the content type of an element. +     * @param name The element type name. +     * @return An integer constant representing the content type. +     * @see #CONTENT_UNDECLARED +     * @see #CONTENT_ANY +     * @see #CONTENT_EMPTY +     * @see #CONTENT_MIXED +     * @see #CONTENT_ELEMENTS +     */ +    public int getElementContentType (String name) +    { +	Object element [] = (Object []) elementInfo.get (name); +	return getContentType (element, CONTENT_UNDECLARED); +    } + + +    /** +     * Register an element. +     * Array format: +     *  [0] element type name +     *  [1] content model (mixed, elements only) +     *  [2] attribute hash table +     */ +    private void setElement ( +	String		name, +	int		contentType, +	String		contentModel, +	Hashtable	attributes +    ) throws SAXException  +    { +	if (skippedPE) +	    return; + +	Object element [] = (Object []) elementInfo.get (name); + +	// first <!ELEMENT ...> or <!ATTLIST ...> for this type? +	if (element == null) { +	    element = new Object [3]; +	    element [0] = new Integer (contentType); +	    element [1] = contentModel; +	    element [2] = attributes; +	    elementInfo.put (name, element); +	    return; +	} + +	// <!ELEMENT ...> declaration? +	if (contentType != CONTENT_UNDECLARED) { +	    // ... following an associated <!ATTLIST ...> +	    if (((Integer) element [0]).intValue () == CONTENT_UNDECLARED) { +		element [0] = new Integer (contentType); +		element [1] = contentModel; +	    } else +		// VC: Unique Element Type Declaration +		handler.verror ("multiple declarations for element type: " +			+ name); +	} + +	// first <!ATTLIST ...>, before <!ELEMENT ...> ? +	else if (attributes != null) +	    element [2] = attributes; +    } + + +    /** +     * Look up the attribute hash table for an element. +     * The hash table is the second item in the element array. +     */ +    private Hashtable getElementAttributes (String name) +    { +	Object element[] = (Object[]) elementInfo.get (name); +	if (element == null) +	    return null; +	else +	    return (Hashtable) element [2]; +    } + + + +    // +    // Attributes +    // + +    /** +     * Get the declared attributes for an element type. +     * @param elname The name of the element type. +     * @return An Enumeration of all the attributes declared for +     *	 a specific element type.  The results will be valid only +     *	 after the DTD (if any) has been parsed. +     * @see #getAttributeType +     * @see #getAttributeEnumeration +     * @see #getAttributeDefaultValueType +     * @see #getAttributeDefaultValue +     * @see #getAttributeExpandedValue +     */ +    private Enumeration declaredAttributes (Object element []) +    { +	Hashtable attlist; + +	if (element == null) +	    return null; +	if ((attlist = (Hashtable) element [2]) == null) +	    return null; +	return attlist.keys (); +    } + +    /** +     * Get the declared attributes for an element type. +     * @param elname The name of the element type. +     * @return An Enumeration of all the attributes declared for +     *	 a specific element type.  The results will be valid only +     *	 after the DTD (if any) has been parsed. +     * @see #getAttributeType +     * @see #getAttributeEnumeration +     * @see #getAttributeDefaultValueType +     * @see #getAttributeDefaultValue +     * @see #getAttributeExpandedValue +     */ +    public Enumeration declaredAttributes (String elname) +    { +	return declaredAttributes ((Object []) elementInfo.get (elname)); +    } + + +    /** +     * Retrieve the declared type of an attribute. +     * @param name The name of the associated element. +     * @param aname The name of the attribute. +     * @return An interend string denoting the type, or null +     *	indicating an undeclared attribute. +     */ +    public String getAttributeType (String name, String aname) +    { +	Object attribute[] = getAttribute (name, aname); +	if (attribute == null) { +	    return null; +	} else { +	    return (String) attribute [0]; +	} +    } + + +    /** +     * Retrieve the allowed values for an enumerated attribute type. +     * @param name The name of the associated element. +     * @param aname The name of the attribute. +     * @return A string containing the token list. +     */ +    public String getAttributeEnumeration (String name, String aname) +    { +	Object attribute[] = getAttribute (name, aname); +	if (attribute == null) { +	    return null; +	} else { +	    // assert:  attribute [0] is "ENUMERATION" or "NOTATION" +	    return (String) attribute [3]; +	} +    } + + +    /** +     * Retrieve the default value of a declared attribute. +     * @param name The name of the associated element. +     * @param aname The name of the attribute. +     * @return The default value, or null if the attribute was +     *	 #IMPLIED or simply undeclared and unspecified. +     * @see #getAttributeExpandedValue +     */ +    public String getAttributeDefaultValue (String name, String aname) +    { +	Object attribute[] = getAttribute (name, aname); +	if (attribute == null) { +	    return null; +	} else { +	    return (String) attribute [1]; +	} +    } + +    /* + +// FIXME:  Leaving this in, until W3C finally resolves the confusion +// between parts of the XML 2nd REC about when entity declararations +// are guaranteed to be known.  Current code matches what section 5.1 +// (conformance) describes, but some readings of the self-contradicting +// text in 4.1 (the "Entity Declared" WFC and VC) seem to expect that +// attribute expansion/normalization must be deferred in some cases +// (just TRY to identify them!). + +     * Retrieve the expanded value of a declared attribute. +     * <p>General entities (and char refs) will be expanded (once). +     * @param name The name of the associated element. +     * @param aname The name of the attribute. +     * @return The expanded default value, or null if the attribute was +     *	 #IMPLIED or simply undeclared +     * @see #getAttributeDefaultValue +    public String getAttributeExpandedValue (String name, String aname) +    throws Exception +    { +	Object attribute[] = getAttribute (name, aname); + +	if (attribute == null) { +	    return null; +	} else if (attribute [4] == null && attribute [1] != null) { +	    // we MUST use the same buf for both quotes else the literal +	    // can't be properly terminated +	    char buf [] = new char [1]; +	    int	flags = LIT_ENTITY_REF | LIT_ATTRIBUTE; +	    String type = getAttributeType (name, aname); + +	    if (type != "CDATA" && type != null) +		flags |= LIT_NORMALIZE; +	    buf [0] = '"'; +	    pushCharArray (null, buf, 0, 1); +	    pushString (null, (String) attribute [1]); +	    pushCharArray (null, buf, 0, 1); +	    attribute [4] = readLiteral (flags); +	} +	return (String) attribute [4]; +    } +     */ + +    /** +     * Retrieve the default value mode of a declared attribute. +     * @see #ATTRIBUTE_DEFAULT_SPECIFIED +     * @see #ATTRIBUTE_DEFAULT_IMPLIED +     * @see #ATTRIBUTE_DEFAULT_REQUIRED +     * @see #ATTRIBUTE_DEFAULT_FIXED +     */ +    public int getAttributeDefaultValueType (String name, String aname) +    { +	Object attribute[] = getAttribute (name, aname); +	if (attribute == null) { +	    return ATTRIBUTE_DEFAULT_UNDECLARED; +	} else { +	    return ((Integer) attribute [2]).intValue (); +	} +    } + + +    /** +     * Register an attribute declaration for later retrieval. +     * Format: +     * - String type +     * - String default value +     * - int value type +     * - enumeration +     * - processed default value +     */ +    private void setAttribute (String elName, String name, String type, +			String enumeration, +			String value, int valueType) +    throws Exception +    { +	Hashtable attlist; + +	if (skippedPE) +	    return; + +	// Create a new hashtable if necessary. +	attlist = getElementAttributes (elName); +	if (attlist == null) +	    attlist = new Hashtable (); + +	// ignore multiple attribute declarations! +	if (attlist.get (name) != null) { +	    // warn ... +	    return; +	} else { +	    Object attribute [] = new Object [5]; +	    attribute [0] = type; +	    attribute [1] = value; +	    attribute [2] = new Integer (valueType); +	    attribute [3] = enumeration; +	    attribute [4] = null; +	    attlist.put (name, attribute); + +	    // save; but don't overwrite any existing <!ELEMENT ...> +	    setElement (elName, CONTENT_UNDECLARED, null, attlist); +	} +    } + + +    /** +     * Retrieve the array representing an attribute declaration. +     */ +    private Object[] getAttribute (String elName, String name) +    { +	Hashtable attlist; + +	attlist = getElementAttributes (elName); +	if (attlist == null) +	    return null; +	return (Object[]) attlist.get (name); +    } + + +    // +    // Entities +    // + +    /** +     * Find the type of an entity. +     * @returns An integer constant representing the entity type. +     * @see #ENTITY_UNDECLARED +     * @see #ENTITY_INTERNAL +     * @see #ENTITY_NDATA +     * @see #ENTITY_TEXT +     */ +    public int getEntityType (String ename) +    { +	Object entity[] = (Object[]) entityInfo.get (ename); +	if (entity == null) { +	    return ENTITY_UNDECLARED; +	} else { +	    return ((Integer) entity [0]).intValue (); +	} +    } + + +    /** +     * Return an external entity's identifier array. +     * @param ename The name of the external entity. +     * @return Three element array containing (in order) the entity's +     *	public identifier, system identifier, and base URI.  Null if +     *	 the entity was not declared as an external entity. +     * @see #getEntityType +     */ +    public String [] getEntityIds (String ename) +    { +	Object entity[] = (Object[]) entityInfo.get (ename); +	if (entity == null) { +	    return null; +	} else { +	    return (String []) entity [1]; +	} +    } + + +    /** +     * Return an internal entity's replacement text. +     * @param ename The name of the internal entity. +     * @return The entity's replacement text, or null if +     *	 the entity was not declared as an internal entity. +     * @see #getEntityType +     */ +    public String getEntityValue (String ename) +    { +	Object entity[] = (Object[]) entityInfo.get (ename); +	if (entity == null) { +	    return null; +	} else { +	    return (String) entity [3]; +	} +    } + + +    /** +     * Register an entity declaration for later retrieval. +     */ +    private void setInternalEntity (String eName, String value) +    throws SAXException +    { +	if (skippedPE) +	    return; + +	if (entityInfo.get (eName) == null) { +	    Object entity[] = new Object [5]; +	    entity [0] = new Integer (ENTITY_INTERNAL); +// FIXME: shrink!!  [2] useless +	    entity [3] = value; +	    entityInfo.put (eName, entity); +	} +  if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) { +    if ("lt" == eName || "gt" == eName || "quot" == eName +        || "apos" == eName || "amp" == eName) +	    return; +  } else { +    if ("lt".equals(eName) || "gt".equals(eName) || "quot".equals(eName) +        || "apos".equals(eName) || "amp".equals(eName)) +	    return; +  } +	handler.getDeclHandler () +	    .internalEntityDecl (eName, value); +    } + + +    /** +     * Register an external entity declaration for later retrieval. +     */ +    private void setExternalEntity (String eName, int eClass, +		     String ids [], String nName) +    { +	if (entityInfo.get (eName) == null) { +	    Object entity[] = new Object [5]; +	    entity [0] = new Integer (eClass); +	    entity [1] = ids; +// FIXME: shrink!!  [2] no longer used, [4] irrelevant given [0] +	    entity [4] = nName; +	    entityInfo.put (eName, entity); +	} +    } + + +    // +    // Notations. +    // + +    /** +     * Report a notation declaration, checking for duplicates. +     */ +    private void setNotation (String nname, String ids []) +    throws SAXException +    { +	if (skippedPE) +	    return; + +	handler.notationDecl (nname, ids); +	if (notationInfo.get (nname) == null) +	    notationInfo.put (nname, nname); +	else +	    // VC: Unique Notation Name +	    handler.verror ("Duplicate notation name decl: " + nname); +    } + + +    // +    // Location. +    // + + +    /** +     * Return the current line number. +     */ +    public int getLineNumber () +    { +	return line; +    } + + +    /** +     * Return the current column number. +     */ +    public int getColumnNumber () +    { +	return column; +    } + + +    ////////////////////////////////////////////////////////////////////// +    // High-level I/O. +    ////////////////////////////////////////////////////////////////////// + + +    /** +     * Read a single character from the readBuffer. +     * <p>The readDataChunk () method maintains the buffer. +     * <p>If we hit the end of an entity, try to pop the stack and +     * keep going. +     * <p> (This approach doesn't really enforce XML's rules about +     * entity boundaries, but this is not currently a validating +     * parser). +     * <p>This routine also attempts to keep track of the current +     * position in external entities, but it's not entirely accurate. +     * @return The next available input character. +     * @see #unread (char) +     * @see #readDataChunk +     * @see #readBuffer +     * @see #line +     * @return The next character from the current input source. +     */ +    private char readCh () +    throws SAXException, IOException +    { +	// As long as there's nothing in the +	// read buffer, try reading more data +	// (for an external entity) or popping +	// the entity stack (for either). +	while (readBufferPos >= readBufferLength) { +	    switch (sourceType) { +	    case INPUT_READER: +	    case INPUT_STREAM: +		readDataChunk (); +		while (readBufferLength < 1) { +		    popInput (); +		    if (readBufferLength < 1) { +			readDataChunk (); +		    } +		} +		break; + +	    default: + +		popInput (); +		break; +	    } +	} + +	char c = readBuffer [readBufferPos++]; +        +	if (c == '\n') { +	    line++; +	    column = 0; +	} else { +	    if (c == '<') { +		/* the most common return to parseContent () ... NOP */ +	    } else if (((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD) +	    		|| ((c >= 0x007f) && (c <= 0x009f) && (c != 0x0085)  +	    		   && xmlVersion == XML_11))  +		error ("illegal XML character U+" +			+ Integer.toHexString (c)); + +	    // If we're in the DTD and in a context where PEs get expanded, +	    // do so ... 1/14/2000 errata identify those contexts.  There +	    // are also spots in the internal subset where PE refs are fatal +	    // errors, hence yet another flag. +	    else if (c == '%' && expandPE) { +		if (peIsError) +		    error ("PE reference within decl in internal subset."); +		parsePEReference (); +		return readCh (); +	    } +	    column++; +	} + +	return c; +    } + + +    /** +     * Push a single character back onto the current input stream. +     * <p>This method usually pushes the character back onto +     * the readBuffer. +     * <p>I don't think that this would ever be called with  +     * readBufferPos = 0, because the methods always reads a character +     * before unreading it, but just in case, I've added a boundary +     * condition. +     * @param c The character to push back. +     * @see #readCh +     * @see #unread (char[]) +     * @see #readBuffer +     */ +    private void unread (char c) +    throws SAXException +    { +	// Normal condition. +	if (c == '\n') { +	    line--; +	    column = -1; +	} +	if (readBufferPos > 0) { +	    readBuffer [--readBufferPos] = c; +	} else { +	    pushString (null, new Character (c).toString ()); +	} +    } + + +    /** +     * Push a char array back onto the current input stream. +     * <p>NOTE: you must <em>never</em> push back characters that you +     * haven't actually read: use pushString () instead. +     * @see #readCh +     * @see #unread (char) +     * @see #readBuffer +     * @see #pushString +     */ +    private void unread (char ch[], int length) +    throws SAXException +    { +	for (int i = 0; i < length; i++) { +	    if (ch [i] == '\n') { +		line--; +		column = -1; +	    } +	} +	if (length < readBufferPos) { +	    readBufferPos -= length; +	} else { +	    pushCharArray (null, ch, 0, length); +	} +    } + + +    /** +     * Push, or skip, a new external input source. +     * The source will be some kind of parsed entity, such as a PE +     * (including the external DTD subset) or content for the body. +     * +     * @param url The java.net.URL object for the entity. +     * @see SAXDriver#resolveEntity +     * @see #pushString +     * @see #sourceType +     * @see #pushInput +     * @see #detectEncoding +     * @see #sourceType +     * @see #readBuffer +     */ +    private void pushURL ( +        boolean		isPE, +	String		ename, +	String		ids [],		// public, system, baseURI +	Reader		reader, +	InputStream	stream, +	String		encoding, +	boolean		doResolve +    ) throws SAXException, IOException +    { +	boolean		ignoreEncoding; +	String		systemId; +	InputSource	source; + +	if (!isPE) +	    dataBufferFlush (); + +	scratch.setPublicId (ids [0]); +	scratch.setSystemId (ids [1]); + +	// See if we should skip or substitute the entity. +	// If we're not skipping, resolving reports startEntity() +	// and updates the (handler's) stack of URIs. +	if (doResolve) { +	    // assert (stream == null && reader == null && encoding == null) +	    source = handler.resolveEntity (isPE, ename, scratch, ids [2]); +	    if (source == null) { +		handler.warn ("skipping entity: " + ename); +		handler.skippedEntity (ename); +		if (isPE) +		    skippedPE = true; +		return; +	    } + +	    // we might be using alternate IDs/encoding +	    systemId = source.getSystemId (); +	    // The following warning and setting systemId was deleted bcause +	    // the application has the option of not setting systemId +	    // provided that it has set the characte/byte stream. +	    /* +	    if (systemId == null) { +		handler.warn ("missing system ID, using " + ids [1]); +		systemId = ids [1]; +	    } +	    */ +	} else { +	    // "[document]", or "[dtd]" via getExternalSubset() +	    scratch.setCharacterStream (reader); +	    scratch.setByteStream (stream); +	    scratch.setEncoding (encoding); +	    source = scratch; +	    systemId = ids [1]; +      if (handler.getFeature (SAXDriver.FEATURE + "string-interning")) { +        handler.startExternalEntity (ename, systemId, +                                     "[document]" == ename); +      } else { +        handler.startExternalEntity (ename, systemId, +                                     "[document]".equals(ename)); +      } +	} + +	// we may have been given I/O streams directly +	if (source.getCharacterStream () != null) { +	    if (source.getByteStream () != null) +		error ("InputSource has two streams!"); +	    reader = source.getCharacterStream (); +	} else if (source.getByteStream () != null) { +	    encoding = source.getEncoding (); +	    if (encoding == null) +		stream = source.getByteStream (); +	    else try { +		reader = new InputStreamReader ( +		    source.getByteStream (), +		    encoding); +	    } catch (IOException e) { +		stream = source.getByteStream (); +	    } +	} else if (systemId == null) +	    error ("InputSource has no URI!"); +	scratch.setCharacterStream (null); +	scratch.setByteStream (null); +	scratch.setEncoding (null); + +	// Push the existing status. +	pushInput (ename); + +	// Create a new read buffer. +	// (Note the four-character margin) +	readBuffer = new char [READ_BUFFER_MAX + 4]; +	readBufferPos = 0; +	readBufferLength = 0; +	readBufferOverflow = -1; +	is = null; +	line = 1; +	column = 0; +	currentByteCount = 0; + +	// If there's an explicit character stream, just +	// ignore encoding declarations. +	if (reader != null) { +	    sourceType = INPUT_READER; +	    this.reader = reader; +	    tryEncodingDecl (true); +	    return; +	} +	 +	// Else we handle the conversion, and need to ensure +	// it's done right. +	sourceType = INPUT_STREAM; +	if (stream != null) { +	    is = stream; +	} else { +	    // We have to open our own stream to the URL. +	    URL url = new URL (systemId); + +	    externalEntity = url.openConnection (); +	    externalEntity.connect (); +	    is = externalEntity.getInputStream (); +	} + +	// If we get to here, there must be +	// an InputStream available. +	if (!is.markSupported ()) { +	    is = new BufferedInputStream (is); +	} + +	// Get any external encoding label. +	if (encoding == null && externalEntity != null) { +	    // External labels can be untrustworthy; filesystems in +	    // particular often have the wrong default for content +	    // that wasn't locally originated.  Those we autodetect. +	    if (!"file".equals (externalEntity.getURL ().getProtocol ())) { +		int temp; + +		// application/xml;charset=something;otherAttr=... +		// ... with many variants on 'something' +		encoding = externalEntity.getContentType (); + +		// MHK code (fix for Saxon 5.5.1/007): +		// protect against encoding==null +		if (encoding==null) { +		    temp = -1; +		} else { +		    temp = encoding.indexOf ("charset"); +		} + +		// RFC 2376 sez MIME text defaults to ASCII, but since the +		// JDK will create a MIME type out of thin air, we always +		// autodetect when there's no explicit charset attribute. +		if (temp < 0) +		    encoding = null;	// autodetect +		else { +		    // only this one attribute +		    if ((temp = encoding.indexOf (';')) > 0) +			encoding = encoding.substring (0, temp); + +		    if ((temp = encoding.indexOf ('=', temp + 7)) > 0) { +			encoding = encoding.substring (temp + 1); + +			// attributes can have comment fields (RFC 822) +			if ((temp = encoding.indexOf ('(')) > 0) +			    encoding = encoding.substring (0, temp); +			// ... and values may be quoted +			if ((temp = encoding.indexOf ('"')) > 0) +			    encoding = encoding.substring (temp + 1, +				    encoding.indexOf ('"', temp + 2)); +			encoding.trim (); +		    } else { +			handler.warn ("ignoring illegal MIME attribute: " +				+ encoding); +			encoding = null; +		    } +		} +	    } +	} + +	// if we got an external encoding label, use it ... +	if (encoding != null) { +	    this.encoding = ENCODING_EXTERNAL; +	    setupDecoding (encoding); +	    ignoreEncoding = true; +	 +	// ... else autodetect from first bytes. +	} else { +	    detectEncoding (); +	    ignoreEncoding = false; +	} + +	// Read any XML or text declaration. +	// If we autodetected, it may tell us the "real" encoding. +	try { +	    tryEncodingDecl (ignoreEncoding); +	} catch (UnsupportedEncodingException x) { +	    encoding = x.getMessage (); + +	    // if we don't handle the declared encoding, +	    // try letting a JVM InputStreamReader do it +	    try { +		if (sourceType != INPUT_STREAM) +		    throw x; + +		is.reset (); +		readBufferPos = 0; +		readBufferLength = 0; +		readBufferOverflow = -1; +		line = 1; +		currentByteCount = column = 0; + +		sourceType = INPUT_READER; +		this.reader = new InputStreamReader (is, encoding); +		is = null; + +		tryEncodingDecl (true); + +	    } catch (IOException e) { +		error ("unsupported text encoding", +		       encoding, +		       null); +	    } +	} +    } + + +    /** +     * Check for an encoding declaration.  This is the second part of the +     * XML encoding autodetection algorithm, relying on detectEncoding to +     * get to the point that this part can read any encoding declaration +     * in the document (using only US-ASCII characters). +     * +     * <p> Because this part starts to fill parser buffers with this data, +     * it's tricky to setup a reader so that Java's built-in decoders can be +     * used for the character encodings that aren't built in to this parser +     * (such as EUC-JP, KOI8-R, Big5, etc). +     * +     * @return any encoding in the declaration, uppercased; or null +     * @see detectEncoding +     */ +    private String tryEncodingDecl (boolean ignoreEncoding) +    throws SAXException, IOException +    { +	// Read the XML/text declaration. +	if (tryRead ("<?xml")) { +	    if (tryWhitespace ()) { +		if (inputStack.size () > 0) { +		    return parseTextDecl (ignoreEncoding); +		} else { +		    return parseXMLDecl (ignoreEncoding); +		} +	    } else { +		// <?xml-stylesheet ...?> or similar +		unread ('l'); +		unread ('m'); +		unread ('x'); +		unread ('?'); +		unread ('<'); +	    } +	} +	return null; +    } + + +    /** +     * Attempt to detect the encoding of an entity. +     * <p>The trick here (as suggested in the XML standard) is that +     * any entity not in UTF-8, or in UCS-2 with a byte-order mark,  +     * <b>must</b> begin with an XML declaration or an encoding +     * declaration; we simply have to look for "<?xml" in various +     * encodings. +     * <p>This method has no way to distinguish among 8-bit encodings. +     * Instead, it sets up for UTF-8, then (possibly) revises its assumption +     * later in setupDecoding ().  Any ASCII-derived 8-bit encoding +     * should work, but most will be rejected later by setupDecoding (). +     * @see #tryEncoding (byte[], byte, byte, byte, byte) +     * @see #tryEncoding (byte[], byte, byte) +     * @see #setupDecoding +     */ +    private void detectEncoding () +    throws SAXException, IOException +    { +	byte signature[] = new byte [4]; + +	// Read the first four bytes for +	// autodetection. +	is.mark (4); +	is.read (signature); +	is.reset (); + +	// +	// FIRST:  four byte encodings (who uses these?) +	// +	if (tryEncoding (signature, (byte) 0x00, (byte) 0x00, +			  (byte) 0x00, (byte) 0x3c)) { +	    // UCS-4 must begin with "<?xml" +	    // 0x00 0x00 0x00 0x3c: UCS-4, big-endian (1234) +	    // "UTF-32BE" +	    encoding = ENCODING_UCS_4_1234; + +	} else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x00, +				 (byte) 0x00, (byte) 0x00)) { +	    // 0x3c 0x00 0x00 0x00: UCS-4, little-endian (4321) +	    // "UTF-32LE" +	    encoding = ENCODING_UCS_4_4321; + +	} else if (tryEncoding (signature, (byte) 0x00, (byte) 0x00, +				 (byte) 0x3c, (byte) 0x00)) { +	    // 0x00 0x00 0x3c 0x00: UCS-4, unusual (2143) +	    encoding = ENCODING_UCS_4_2143; + +	} else if (tryEncoding (signature, (byte) 0x00, (byte) 0x3c, +				 (byte) 0x00, (byte) 0x00)) { +	    // 0x00 0x3c 0x00 0x00: UCS-4, unusual (3421) +	    encoding = ENCODING_UCS_4_3412; + +	    // 00 00 fe ff UCS_4_1234 (with BOM) +	    // ff fe 00 00 UCS_4_4321 (with BOM) +	} + +	// +	// SECOND:  two byte encodings +	// note ... with 1/14/2000 errata the XML spec identifies some +	// more "broken UTF-16" autodetection cases, with no XML decl, +	// which we don't handle here (that's legal too). +	// +	else if (tryEncoding (signature, (byte) 0xfe, (byte) 0xff)) { +	    // UCS-2 with a byte-order marker. (UTF-16) +	    // 0xfe 0xff: UCS-2, big-endian (12) +	    encoding = ENCODING_UCS_2_12; +	    is.read (); is.read (); + +	} else if (tryEncoding (signature, (byte) 0xff, (byte) 0xfe)) { +	    // UCS-2 with a byte-order marker. (UTF-16) +	    // 0xff 0xfe: UCS-2, little-endian (21) +	    encoding = ENCODING_UCS_2_21; +	    is.read (); is.read (); + +	} else if (tryEncoding (signature, (byte) 0x00, (byte) 0x3c, +				 (byte) 0x00, (byte) 0x3f)) { +	    // UTF-16BE (otherwise, malformed UTF-16) +	    // 0x00 0x3c 0x00 0x3f: UCS-2, big-endian, no byte-order mark +	    encoding = ENCODING_UCS_2_12; +	    error ("no byte-order mark for UCS-2 entity"); + +	} else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x00, +				 (byte) 0x3f, (byte) 0x00)) { +	    // UTF-16LE (otherwise, malformed UTF-16) +	    // 0x3c 0x00 0x3f 0x00: UCS-2, little-endian, no byte-order mark +	    encoding = ENCODING_UCS_2_21; +	    error ("no byte-order mark for UCS-2 entity"); +	} + +	// +	// THIRD:  ASCII-derived encodings, fixed and variable lengths +	// +	else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x3f, +			       (byte) 0x78, (byte) 0x6d)) { +	    // ASCII derived +	    // 0x3c 0x3f 0x78 0x6d: UTF-8 or other 8-bit markup (read ENCODING) +	    encoding = ENCODING_UTF_8; +	    prefetchASCIIEncodingDecl (); + +	} else if (signature [0] == (byte) 0xef +		&& signature [1] == (byte) 0xbb +		&& signature [2] == (byte) 0xbf) { +	    // 0xef 0xbb 0xbf: UTF-8 BOM (not part of document text) +	    // this un-needed notion slipped into XML 2nd ed through a +	    // "non-normative" erratum; now required by MSFT and UDDI, +	    // and E22 made it normative. +	    encoding = ENCODING_UTF_8; +	    is.read (); is.read (); is.read (); + +	} else { +	    // 4c 6f a7 94 ... we don't understand EBCDIC flavors +	    // ... but we COULD at least kick in some fixed code page + +	    // (default) UTF-8 without encoding/XML declaration +	    encoding = ENCODING_UTF_8; +	} +    } + + +    /** +     * Check for a four-byte signature. +     * <p>Utility routine for detectEncoding (). +     * <p>Always looks for some part of "<?XML" in a specific encoding. +     * @param sig The first four bytes read. +     * @param b1 The first byte of the signature +     * @param b2 The second byte of the signature +     * @param b3 The third byte of the signature +     * @param b4 The fourth byte of the signature +     * @see #detectEncoding +     */ +    private static boolean tryEncoding ( +	byte sig[], byte b1, byte b2, byte b3, byte b4) +    { +	return (sig [0] == b1 && sig [1] == b2 +		&& sig [2] == b3 && sig [3] == b4); +    } + + +    /** +     * Check for a two-byte signature. +     * <p>Looks for a UCS-2 byte-order mark. +     * <p>Utility routine for detectEncoding (). +     * @param sig The first four bytes read. +     * @param b1 The first byte of the signature +     * @param b2 The second byte of the signature +     * @see #detectEncoding +     */ +    private static boolean tryEncoding (byte sig[], byte b1, byte b2) +    { +	return ((sig [0] == b1) && (sig [1] == b2)); +    } + + +    /** +     * This method pushes a string back onto input. +     * <p>It is useful either as the expansion of an internal entity,  +     * or for backtracking during the parse. +     * <p>Call pushCharArray () to do the actual work. +     * @param s The string to push back onto input. +     * @see #pushCharArray +     */ +    private void pushString (String ename, String s) +    throws SAXException +    { +	char ch[] = s.toCharArray (); +	pushCharArray (ename, ch, 0, ch.length); +    } + + +    /** +     * Push a new internal input source. +     * <p>This method is useful for expanding an internal entity, +     * or for unreading a string of characters.  It creates a new +     * readBuffer containing the characters in the array, instead +     * of characters converted from an input byte stream. +     * @param ch The char array to push. +     * @see #pushString +     * @see #pushURL +     * @see #readBuffer +     * @see #sourceType +     * @see #pushInput +     */ +    private void pushCharArray (String ename, char ch[], int start, int length) +    throws SAXException +    { +	// Push the existing status +	pushInput (ename); +	if (ename != null && doReport) { +	    dataBufferFlush (); +	    handler.startInternalEntity (ename); +	} +	sourceType = INPUT_INTERNAL; +	readBuffer = ch; +	readBufferPos = start; +	readBufferLength = length; +	readBufferOverflow = -1; +    } + + +    /** +     * Save the current input source onto the stack. +     * <p>This method saves all of the global variables associated with +     * the current input source, so that they can be restored when a new +     * input source has finished.  It also tests for entity recursion. +     * <p>The method saves the following global variables onto a stack +     * using a fixed-length array: +     * <ol> +     * <li>sourceType +     * <li>externalEntity +     * <li>readBuffer +     * <li>readBufferPos +     * <li>readBufferLength +     * <li>line +     * <li>encoding +     * </ol> +     * @param ename The name of the entity (if any) causing the new input. +     * @see #popInput +     * @see #sourceType +     * @see #externalEntity +     * @see #readBuffer +     * @see #readBufferPos +     * @see #readBufferLength +     * @see #line +     * @see #encoding +     */ +    private void pushInput (String ename) +    throws SAXException +    { +	// Check for entity recursion. +	if (ename != null) { +	    Enumeration entities = entityStack.elements (); +	    while (entities.hasMoreElements ()) { +		String e = (String) entities.nextElement (); +		if (e != null && e == ename) { +		    error ("recursive reference to entity", ename, null); +		} +	    } +	} +	entityStack.push (ename); + +	// Don't bother if there is no current input. +	if (sourceType == INPUT_NONE) { +	    return; +	} + +	// Set up a snapshot of the current +	// input source. +	Object input[] = new Object [12]; + +	input [0] = new Integer (sourceType); +	input [1] = externalEntity; +	input [2] = readBuffer; +	input [3] = new Integer (readBufferPos); +	input [4] = new Integer (readBufferLength); +	input [5] = new Integer (line); +	input [6] = new Integer (encoding); +	input [7] = new Integer (readBufferOverflow); +	input [8] = is; +	input [9] = new Integer (currentByteCount); +	input [10] = new Integer (column); +	input [11] = reader; + +	// Push it onto the stack. +	inputStack.push (input); +    } + + +    /** +     * Restore a previous input source. +     * <p>This method restores all of the global variables associated with +     * the current input source. +     * @exception java.io.EOFException +     *    If there are no more entries on the input stack. +     * @see #pushInput +     * @see #sourceType +     * @see #externalEntity +     * @see #readBuffer +     * @see #readBufferPos +     * @see #readBufferLength +     * @see #line +     * @see #encoding +     */ +    private void popInput () +    throws SAXException, IOException +    { +	String ename = (String) entityStack.pop (); + +	if (ename != null && doReport) +	    dataBufferFlush (); +	switch (sourceType) { +	case INPUT_STREAM: +	    handler.endExternalEntity (ename); +	    is.close (); +	    break; +	case INPUT_READER: +	    handler.endExternalEntity (ename); +	    reader.close (); +	    break; +	case INPUT_INTERNAL: +	    if (ename != null && doReport) +		handler.endInternalEntity (ename); +	    break; +	} + +	// Throw an EOFException if there +	// is nothing else to pop. +	if (inputStack.isEmpty ()) { +	    throw new EOFException ("no more input"); +	} + +	Object input [] = (Object[]) inputStack.pop (); + +	sourceType = ((Integer) input [0]).intValue (); +	externalEntity = (URLConnection) input [1]; +	readBuffer = (char[]) input [2]; +	readBufferPos = ((Integer) input [3]).intValue (); +	readBufferLength = ((Integer) input [4]).intValue (); +	line = ((Integer) input [5]).intValue (); +	encoding = ((Integer) input [6]).intValue (); +	readBufferOverflow = ((Integer) input [7]).intValue (); +	is = (InputStream) input [8]; +	currentByteCount = ((Integer) input [9]).intValue (); +	column = ((Integer) input [10]).intValue (); +	reader = (Reader) input [11]; +    } + + +    /** +     * Return true if we can read the expected character. +     * <p>Note that the character will be removed from the input stream +     * on success, but will be put back on failure.  Do not attempt to +     * read the character again if the method succeeds. +     * @param delim The character that should appear next.  For a +     *	      insensitive match, you must supply this in upper-case. +     * @return true if the character was successfully read, or false if +     *	 it was not. +     * @see #tryRead (String) +     */ +    private boolean tryRead (char delim) +    throws SAXException, IOException +    { +	char c; + +	// Read the character +	c = readCh (); + +	// Test for a match, and push the character +	// back if the match fails. +	if (c == delim) { +	    return true; +	} else { +	    unread (c); +	    return false; +	} +    } + + +    /** +     * Return true if we can read the expected string. +     * <p>This is simply a convenience method. +     * <p>Note that the string will be removed from the input stream +     * on success, but will be put back on failure.  Do not attempt to +     * read the string again if the method succeeds. +     * <p>This method will push back a character rather than an +     * array whenever possible (probably the majority of cases). +     * @param delim The string that should appear next. +     * @return true if the string was successfully read, or false if +     *	 it was not. +     * @see #tryRead (char) +     */ +    private boolean tryRead (String delim) +    throws SAXException, IOException +    { +	return tryRead (delim.toCharArray ()); +    } + +    private boolean tryRead (char ch []) +    throws SAXException, IOException +    { +	char c; + +	// Compare the input, character- +	// by character. + +	for (int i = 0; i < ch.length; i++) { +	    c = readCh (); +	    if (c != ch [i]) { +		unread (c); +		if (i != 0) { +		    unread (ch, i); +		} +		return false; +	    } +	} +	return true; +    } + + + +    /** +     * Return true if we can read some whitespace. +     * <p>This is simply a convenience method. +     * <p>This method will push back a character rather than an +     * array whenever possible (probably the majority of cases). +     * @return true if whitespace was found. +     */ +    private boolean tryWhitespace () +    throws SAXException, IOException +    { +	char c; +	c = readCh (); +	if (isWhitespace (c)) { +	    skipWhitespace (); +	    return true; +	} else { +	    unread (c); +	    return false; +	} +    } + + +    /** +     * Read all data until we find the specified string. +     * This is useful for scanning CDATA sections and PIs. +     * <p>This is inefficient right now, since it calls tryRead () +     * for every character. +     * @param delim The string delimiter +     * @see #tryRead (String, boolean) +     * @see #readCh +     */ +    private void parseUntil (String delim) +    throws SAXException, IOException +    { +	parseUntil (delim.toCharArray ()); +    } + +    private void parseUntil (char delim []) +    throws SAXException, IOException +    { +	char c; +	int startLine = line; + +	try { +	    while (!tryRead (delim)) { +		c = readCh (); +		dataBufferAppend (c); +	    } +	} catch (EOFException e) { +	    error ("end of input while looking for delimiter " +		+ "(started on line " + startLine +		+ ')', null, new String (delim)); +	} +    } + + +    ////////////////////////////////////////////////////////////////////// +    // Low-level I/O. +    ////////////////////////////////////////////////////////////////////// + + +    /** +     * Prefetch US-ASCII XML/text decl from input stream into read buffer. +     * Doesn't buffer more than absolutely needed, so that when an encoding +     * decl says we need to create an InputStreamReader, we can discard our +     * buffer and reset().  Caller knows the first chars of the decl exist +     * in the input stream. +     */ +    private void prefetchASCIIEncodingDecl () +    throws SAXException, IOException +    { +	int ch; +	readBufferPos = readBufferLength = 0; + +	is.mark (readBuffer.length); +	while (true) { +	    ch = is.read (); +	    readBuffer [readBufferLength++] = (char) ch; +	    switch (ch) { +	      case (int) '>': +		return; +	      case -1: +		error ("file ends before end of XML or encoding declaration.", +		       null, "?>"); +	    } +	    if (readBuffer.length == readBufferLength) +		error ("unfinished XML or encoding declaration"); +	} +    } + +    /** +     * Read a chunk of data from an external input source. +     * <p>This is simply a front-end that fills the rawReadBuffer +     * with bytes, then calls the appropriate encoding handler. +     * @see #encoding +     * @see #rawReadBuffer +     * @see #readBuffer +     * @see #filterCR +     * @see #copyUtf8ReadBuffer +     * @see #copyIso8859_1ReadBuffer +     * @see #copyUcs_2ReadBuffer +     * @see #copyUcs_4ReadBuffer +     */ +    private void readDataChunk () +    throws SAXException, IOException +    { +	int count; + +	// See if we have any overflow (filterCR sets for CR at end) +	if (readBufferOverflow > -1) { +	    readBuffer [0] = (char) readBufferOverflow; +	    readBufferOverflow = -1; +	    readBufferPos = 1; +	    sawCR = true; +	} else { +	    readBufferPos = 0; +	    sawCR = false; +	} + +	// input from a character stream. +	if (sourceType == INPUT_READER) { +	    count = reader.read (readBuffer, +			    readBufferPos, READ_BUFFER_MAX - readBufferPos); +	    if (count < 0) +		readBufferLength = readBufferPos; +	    else +		readBufferLength = readBufferPos + count; +	    if (readBufferLength > 0) +		filterCR (count >= 0); +	    sawCR = false; +	    return; +	} + +	// Read as many bytes as possible into the raw buffer. +	count = is.read (rawReadBuffer, 0, READ_BUFFER_MAX); + +	// Dispatch to an encoding-specific reader method to populate +	// the readBuffer.  In most parser speed profiles, these routines +	// show up at the top of the CPU usage chart. +	if (count > 0) { +	    switch (encoding) { +	      // one byte builtins +	      case ENCODING_ASCII: +		copyIso8859_1ReadBuffer (count, (char) 0x0080); +		break; +	      case ENCODING_UTF_8: +		copyUtf8ReadBuffer (count); +		break; +	      case ENCODING_ISO_8859_1: +		copyIso8859_1ReadBuffer (count, (char) 0); +		break; + +	      // two byte builtins +	      case ENCODING_UCS_2_12: +		copyUcs2ReadBuffer (count, 8, 0); +		break; +	      case ENCODING_UCS_2_21: +		copyUcs2ReadBuffer (count, 0, 8); +		break; + +	      // four byte builtins +	      case ENCODING_UCS_4_1234: +		copyUcs4ReadBuffer (count, 24, 16, 8, 0); +		break; +	      case ENCODING_UCS_4_4321: +		copyUcs4ReadBuffer (count, 0, 8, 16, 24); +		break; +	      case ENCODING_UCS_4_2143: +		copyUcs4ReadBuffer (count, 16, 24, 0, 8); +		break; +	      case ENCODING_UCS_4_3412: +		copyUcs4ReadBuffer (count, 8, 0, 24, 16); +		break; +	    } +	} else +	    readBufferLength = readBufferPos; + +	readBufferPos = 0; + +	// Filter out all carriage returns if we've seen any +	// (including any saved from a previous read) +	if (sawCR) { +	    filterCR (count >= 0); +	    sawCR = false; + +	    // must actively report EOF, lest some CRs get lost. +	    if (readBufferLength == 0 && count >= 0) +		readDataChunk (); +	} + +	if (count > 0) +	    currentByteCount += count; +    } + + +    /** +     * Filter carriage returns in the read buffer. +     * CRLF becomes LF; CR becomes LF. +     * @param moreData true iff more data might come from the same source +     * @see #readDataChunk +     * @see #readBuffer +     * @see #readBufferOverflow +     */ +    private void filterCR (boolean moreData) +    { +	int i, j; + +	readBufferOverflow = -1; + +loop: +	for (i = j = readBufferPos; j < readBufferLength; i++, j++) { +	    switch (readBuffer [j]) { +	    case '\r': +		if (j == readBufferLength - 1) { +		    if (moreData) { +			readBufferOverflow = '\r'; +			readBufferLength--; +		    } else 	// CR at end of buffer +			readBuffer [i++] = '\n'; +		    break loop; +		} else if (readBuffer [j + 1] == '\n') { +		    j++; +		} +		readBuffer [i] = '\n'; +		break; + +	    case '\n': +	    default: +		readBuffer [i] = readBuffer [j]; +		break; +	    } +	} +	readBufferLength = i; +    } + +    /** +     * Convert a buffer of UTF-8-encoded bytes into UTF-16 characters. +     * <p>When readDataChunk () calls this method, the raw bytes are in  +     * rawReadBuffer, and the final characters will appear in  +     * readBuffer. +     * <p>Note that as of Unicode 3.1, good practice became a requirement, +     * so that each Unicode character has exactly one UTF-8 representation. +     * @param count The number of bytes to convert. +     * @see #readDataChunk +     * @see #rawReadBuffer +     * @see #readBuffer +     * @see #getNextUtf8Byte +     */ +    private void copyUtf8ReadBuffer (int count) +    throws SAXException, IOException +    { +	int	i = 0; +	int	j = readBufferPos; +	int	b1; +	char	c = 0; + +	/* +	// check once, so the runtime won't (if it's smart enough) +	if (count < 0 || count > rawReadBuffer.length) +	    throw new ArrayIndexOutOfBoundsException (Integer.toString (count)); +	*/ + +	while (i < count) { +	    b1 = rawReadBuffer [i++]; + +	    // Determine whether we are dealing +	    // with a one-, two-, three-, or four- +	    // byte sequence. +	    if (b1 < 0) { +		if ((b1 & 0xe0) == 0xc0) { +		    // 2-byte sequence: 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx +		    c = (char) (((b1 & 0x1f) << 6) +				| getNextUtf8Byte (i++, count)); +		    if (c < 0x0080) +			encodingError ("Illegal two byte UTF-8 sequence", +				c, 0); +		    //Sec 2.11 +		    // [1] the two-character sequence #xD #xA +		    // [2] the two-character sequence #xD #x85 +		    if ((c == 0x0085 || c == 0x000a) && sawCR) +		       	continue; +		     +		    // Sec 2.11 +		    // [3] the single character #x85 +		     +		    if(c == 0x0085  && xmlVersion == XML_11) +		    	readBuffer[j++] = '\r'; +		} else if ((b1 & 0xf0) == 0xe0) { +		    // 3-byte sequence: +		    // zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx +		    // most CJKV characters +		    c = (char) (((b1 & 0x0f) << 12) | +				   (getNextUtf8Byte (i++, count) << 6) | +				   getNextUtf8Byte (i++, count)); +                    //sec 2.11 +		    //[4] the single character #x2028 +		    if(c == 0x2028 && xmlVersion == XML_11){ +		       	readBuffer[j++] = '\r'; +		       	sawCR = true; +		       	continue; +		    } +		    if (c < 0x0800 || (c >= 0xd800 && c <= 0xdfff)) +			encodingError ("Illegal three byte UTF-8 sequence", +				c, 0); +		} else if ((b1 & 0xf8) == 0xf0) { +		    // 4-byte sequence: 11101110wwwwzzzzyy + 110111yyyyxxxxxx +		    //     = 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx +		    // (uuuuu = wwww + 1) +		    // "Surrogate Pairs" ... from the "Astral Planes" +		    // Unicode 3.1 assigned the first characters there +		    int iso646 = b1 & 07; +		    iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count); +		    iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count); +		    iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count); + +		    if (iso646 <= 0xffff) { +			encodingError ("Illegal four byte UTF-8 sequence", +				iso646, 0); +		    } else { +			if (iso646 > 0x0010ffff) +			    encodingError ( +				"UTF-8 value out of range for Unicode", +				iso646, 0); +			iso646 -= 0x010000; +			readBuffer [j++] = (char) (0xd800 | (iso646 >> 10)); +			readBuffer [j++] = (char) (0xdc00 | (iso646 & 0x03ff)); +			continue; +		    } +		} else { +		    // The five and six byte encodings aren't supported; +		    // they exceed the Unicode (and XML) range. +		    encodingError ( +			    "unsupported five or six byte UTF-8 sequence", +			    0xff & b1, i); +		    // NOTREACHED +		    c = 0; +		} +	    } else { +		// 1-byte sequence: 000000000xxxxxxx = 0xxxxxxx +		// (US-ASCII character, "common" case, one branch to here) +		c = (char) b1; +	    } +	    readBuffer [j++] = c; +	    if (c == '\r') +		sawCR = true; +	} +	// How many characters have we read? +	readBufferLength = j; +    } + + +    /** +     * Return the next byte value in a UTF-8 sequence. +     * If it is not possible to get a byte from the current +     * entity, throw an exception. +     * @param pos The current position in the rawReadBuffer. +     * @param count The number of bytes in the rawReadBuffer +     * @return The significant six bits of a non-initial byte in +     *	 a UTF-8 sequence. +     * @exception EOFException If the sequence is incomplete. +     */ +    private int getNextUtf8Byte (int pos, int count) +    throws SAXException, IOException +    { +	int val; + +	// Take a character from the buffer +	// or from the actual input stream. +	if (pos < count) { +	    val = rawReadBuffer [pos]; +	} else { +	    val = is.read (); +	    if (val == -1) { +		encodingError ("unfinished multi-byte UTF-8 sequence at EOF", +			-1, pos); +	    } +	} + +	// Check for the correct bits at the start. +	if ((val & 0xc0) != 0x80) { +	    encodingError ("bad continuation of multi-byte UTF-8 sequence", +		    val, pos + 1); +	} + +	// Return the significant bits. +	return (val & 0x3f); +    } + + +    /** +     * Convert a buffer of US-ASCII or ISO-8859-1-encoded bytes into +     * UTF-16 characters. +     * +     * <p>When readDataChunk () calls this method, the raw bytes are in  +     * rawReadBuffer, and the final characters will appear in  +     * readBuffer. +     * +     * @param count The number of bytes to convert. +     * @param mask For ASCII conversion, 0x7f; else, 0xff. +     * @see #readDataChunk +     * @see #rawReadBuffer +     * @see #readBuffer +     */ +    private void copyIso8859_1ReadBuffer (int count, char mask) +    throws IOException +    { +	int i, j; +	for (i = 0, j = readBufferPos; i < count; i++, j++) { +	    char c = (char) (rawReadBuffer [i] & 0xff); +	    if ((c & mask) != 0) +		throw new CharConversionException ("non-ASCII character U+" +						    + Integer.toHexString (c)); +	    if (c == 0x0085 && xmlVersion == XML_11) +	       c = '\r';	 +	    readBuffer [j] = c; +	    if (c == '\r') { +		sawCR = true; +	    } +	} +	readBufferLength = j; +    } + + +    /** +     * Convert a buffer of UCS-2-encoded bytes into UTF-16 characters +     * (as used in Java string manipulation). +     * +     * <p>When readDataChunk () calls this method, the raw bytes are in  +     * rawReadBuffer, and the final characters will appear in  +     * readBuffer. +     * @param count The number of bytes to convert. +     * @param shift1 The number of bits to shift byte 1. +     * @param shift2 The number of bits to shift byte 2 +     * @see #readDataChunk +     * @see #rawReadBuffer +     * @see #readBuffer +     */ +    private void copyUcs2ReadBuffer (int count, int shift1, int shift2) +    throws SAXException +    { +	int j = readBufferPos; + +	if (count > 0 && (count % 2) != 0) { +	    encodingError ("odd number of bytes in UCS-2 encoding", -1, count); +	} +	// The loops are faster with less internal brancing; hence two +	if (shift1 == 0) {	// "UTF-16-LE" +	    for (int i = 0; i < count; i += 2) { +		char c = (char) (rawReadBuffer [i + 1] << 8); +		c |= 0xff & rawReadBuffer [i]; +		readBuffer [j++] = c; +		if (c == '\r') +		    sawCR = true; +	    } +	} else {	// "UTF-16-BE" +	    for (int i = 0; i < count; i += 2) { +		char c = (char) (rawReadBuffer [i] << 8); +		c |= 0xff & rawReadBuffer [i + 1]; +		readBuffer [j++] = c; +		if (c == '\r') +		    sawCR = true; +	    } +	} +	readBufferLength = j; +    } + + +    /** +     * Convert a buffer of UCS-4-encoded bytes into UTF-16 characters. +     * +     * <p>When readDataChunk () calls this method, the raw bytes are in  +     * rawReadBuffer, and the final characters will appear in  +     * readBuffer. +     * <p>Java has Unicode chars, and this routine uses surrogate pairs +     * for ISO-10646 values between 0x00010000 and 0x000fffff.  An +     * exception is thrown if the ISO-10646 character has no Unicode +     * representation. +     * +     * @param count The number of bytes to convert. +     * @param shift1 The number of bits to shift byte 1. +     * @param shift2 The number of bits to shift byte 2 +     * @param shift3 The number of bits to shift byte 2 +     * @param shift4 The number of bits to shift byte 2 +     * @see #readDataChunk +     * @see #rawReadBuffer +     * @see #readBuffer +     */ +    private void copyUcs4ReadBuffer (int count, int shift1, int shift2, +			      int shift3, int shift4) +    throws SAXException +    { +	int j = readBufferPos; + +	if (count > 0 && (count % 4) != 0) { +	    encodingError ( +		    "number of bytes in UCS-4 encoding not divisible by 4", +		    -1, count); +	} +	for (int i = 0; i < count; i += 4) { +	    int value = (((rawReadBuffer [i] & 0xff) << shift1) | +		      ((rawReadBuffer [i + 1] & 0xff) << shift2) | +		      ((rawReadBuffer [i + 2] & 0xff) << shift3) | +		      ((rawReadBuffer [i + 3] & 0xff) << shift4)); +	    if (value < 0x0000ffff) { +		readBuffer [j++] = (char) value; +		if (value == (int) '\r') { +		    sawCR = true; +		} +	    } else if (value < 0x0010ffff) { +		value -= 0x010000; +		readBuffer [j++] = (char) (0xd8 | ((value >> 10) & 0x03ff)); +		readBuffer [j++] = (char) (0xdc | (value & 0x03ff)); +	    } else { +		encodingError ("UCS-4 value out of range for Unicode", +			       value, i); +	    } +	} +	readBufferLength = j; +    } + + +    /** +     * Report a character encoding error. +     */ +    private void encodingError (String message, int value, int offset) +    throws SAXException +    { +	if (value != -1) +	    message = message + " (character code: 0x" + +		      Integer.toHexString (value) + ')'; +	error (message); +    } + + +    ////////////////////////////////////////////////////////////////////// +    // Local Variables. +    ////////////////////////////////////////////////////////////////////// + +    /** +     * Re-initialize the variables for each parse. +     */ +    private void initializeVariables () +    { +	// First line +	line = 1; +	column = 0; + +	// Set up the buffers for data and names +	dataBufferPos = 0; +	dataBuffer = new char [DATA_BUFFER_INITIAL]; +	nameBufferPos = 0; +	nameBuffer = new char [NAME_BUFFER_INITIAL]; + +	// Set up the DTD hash tables +	elementInfo = new Hashtable (); +	entityInfo = new Hashtable (); +	notationInfo = new Hashtable (); +	skippedPE = false; + +	// Set up the variables for the current +	// element context. +	currentElement = null; +	currentElementContent = CONTENT_UNDECLARED; + +	// Set up the input variables +	sourceType = INPUT_NONE; +	inputStack = new Stack (); +	entityStack = new Stack (); +	externalEntity = null; +	tagAttributePos = 0; +	tagAttributes = new String [100]; +	rawReadBuffer = new byte [READ_BUFFER_MAX]; +	readBufferOverflow = -1; + +	scratch = new InputSource (); + +	inLiteral = false; +	expandPE = false; +	peIsError = false; + +	doReport = false; + +	inCDATA = false; + +	symbolTable = new Object [SYMBOL_TABLE_LENGTH][]; +    } + + +    // +    // The current XML handler interface. +    // +    private SAXDriver	handler; + +    // +    // I/O information. +    // +    private Reader	reader; 	// current reader +    private InputStream	is; 		// current input stream +    private int		line; 		// current line number +    private int		column; 	// current column number +    private int		sourceType; 	// type of input source +    private Stack	inputStack; 	// stack of input soruces +    private URLConnection externalEntity; // current external entity +    private int		encoding; 	// current character encoding +    private int		currentByteCount; // bytes read from current source +    private InputSource	scratch;	// temporary + +    // +    // Buffers for decoded but unparsed character input. +    // +    private char	readBuffer []; +    private int		readBufferPos; +    private int		readBufferLength; +    private int		readBufferOverflow;  // overflow from last data chunk. + + +    // +    // Buffer for undecoded raw byte input. +    // +    private final static int READ_BUFFER_MAX = 16384; +    private byte	rawReadBuffer []; + + +    // +    // Buffer for attribute values, char refs, DTD stuff. +    // +    private static int DATA_BUFFER_INITIAL = 4096; +    private char	dataBuffer []; +    private int		dataBufferPos; + +    // +    // Buffer for parsed names. +    // +    private static int NAME_BUFFER_INITIAL = 1024; +    private char	nameBuffer []; +    private int		nameBufferPos; + +    // +    // Save any standalone flag +    // +    private boolean	docIsStandalone; + +    // +    // Hashtables for DTD information on elements, entities, and notations. +    // Populated until we start ignoring decls (because of skipping a PE) +    // +    private Hashtable	elementInfo; +    private Hashtable	entityInfo; +    private Hashtable	notationInfo; +    private boolean	skippedPE; + + +    // +    // Element type currently in force. +    // +    private String	currentElement; +    private int		currentElementContent; + +    // +    // Stack of entity names, to detect recursion. +    // +    private Stack	entityStack; + +    // +    // PE expansion is enabled in most chunks of the DTD, not all. +    // When it's enabled, literals are treated differently. +    // +    private boolean	inLiteral; +    private boolean	expandPE; +    private boolean	peIsError; + +    // +    // can't report entity expansion inside two constructs: +    // - attribute expansions (internal entities only) +    // - markup declarations (parameter entities only) +    // +    private boolean	doReport; + +    // +    // Symbol table, for caching interned names. +    // +    // These show up wherever XML names or nmtokens are used:  naming elements, +    // attributes, PIs, notations, entities, and enumerated attribute values. +    // +    // NOTE:  This hashtable doesn't grow.  The default size is intended to be +    // rather large for most documents.  Example:  one snapshot of the DocBook +    // XML 4.1 DTD used only about 350 such names.  As a rule, only pathological +    // documents (ones that don't reuse names) should ever see much collision. +    // +    // Be sure that SYMBOL_TABLE_LENGTH always stays prime, for best hashing. +    // "2039" keeps the hash table size at about two memory pages on typical +    // 32 bit hardware. +    // +    private final static int SYMBOL_TABLE_LENGTH = 2039; + +    private Object	symbolTable [][]; + +    // +    // Hash table of attributes found in current start tag. +    // +    private String	tagAttributes []; +    private int		tagAttributePos; + +    // +    // Utility flag: have we noticed a CR while reading the last +    // data chunk?  If so, we will have to go back and normalise +    // CR or CR/LF line ends. +    // +    private boolean	sawCR; + +    // +    // Utility flag: are we in CDATA?  If so, whitespace isn't ignorable. +    //  +    private boolean	inCDATA; +     +    // +    // Xml version. +    //   +    private static final int XML_10 = 0;  +    private static final int XML_11 = 1;  +    private int 	xmlVersion = XML_10; +} | 

