diff options
Diffstat (limited to 'libjava/gnu/xml/dom/html2/DomHTMLParser.java')
-rw-r--r-- | libjava/gnu/xml/dom/html2/DomHTMLParser.java | 266 |
1 files changed, 0 insertions, 266 deletions
diff --git a/libjava/gnu/xml/dom/html2/DomHTMLParser.java b/libjava/gnu/xml/dom/html2/DomHTMLParser.java deleted file mode 100644 index 7b445622509..00000000000 --- a/libjava/gnu/xml/dom/html2/DomHTMLParser.java +++ /dev/null @@ -1,266 +0,0 @@ -/* DomHTMLParser.java -- - Copyright (C) 2005 Free Software Foundation, Inc. - -This file is part of GNU Classpath. - -GNU Classpath is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GNU Classpath is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU Classpath; see the file COPYING. If not, write to the -Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -02110-1301 USA. - -Linking this library statically or dynamically with other modules is -making a combined work based on this library. Thus, the terms and -conditions of the GNU General Public License cover the whole -combination. - -As a special exception, the copyright holders of this library give you -permission to link this library with independent modules to produce an -executable, regardless of the license terms of these independent -modules, and to copy and distribute the resulting executable under -terms of your choice, provided that you also meet, for each linked -independent module, the terms and conditions of the license of that -module. An independent module is a module which is not derived from -or based on this library. If you modify this library, you may extend -this exception to your version of the library, but you are not -obligated to do so. If you do not wish to do so, delete this -exception statement from your version. */ - - -package gnu.xml.dom.html2; - -import gnu.javax.swing.text.html.parser.support.Parser; - -import java.io.IOException; -import java.io.Reader; - -import java.util.Enumeration; -import java.util.Iterator; -import java.util.LinkedList; - -import javax.swing.text.AttributeSet; -import javax.swing.text.html.HTML; -import javax.swing.text.html.parser.DTD; -import javax.swing.text.html.parser.TagElement; - -import org.w3c.dom.NamedNodeMap; -import org.w3c.dom.Node; -import org.w3c.dom.html2.HTMLDocument; - -/** - * This parser reads HTML from the given stream and stores into - * {@link HTMLDocument}. The HTML tag becomes the {@link Node}. - * The tag attributes become the node attributes. The text inside - * HTML tag is inserted as one or several text nodes. The nested - * HTML tags are inserted as child nodes. - * - * If the strict tree structure, closing the tag means closing all - * nested tags. To work around this, this parser closes the nested - * tags and immediately reopens them after the closed tag. - * In this way, <code><b><i>c</b>d</code> - * is parsed as <code><b><i>c</i></b><i>d</code> . - * - * @author Audrius Meskauskas (AudriusA@Bioinformatics.org) - */ -public class DomHTMLParser - extends gnu.javax.swing.text.html.parser.support.Parser -{ - /** - * The target where HTML document will be inserted. - */ - protected DomHTMLDocument document; - - /** - * The subsequently created new nodes will be inserted as the - * childs of this cursor. - */ - protected Node cursor; - - /** - * Create parser using the given DTD. - * - * @param dtd the DTD (for example, - * {@link gnu.javax.swing.text.html.parser.HTML_401F}). - */ - public DomHTMLParser(DTD dtd) - { - super(dtd); - } - - /** - * Parse SGML insertion ( <! ... > ). - * Currently just treats it as comment. - */ - public boolean parseMarkupDeclarations(StringBuffer strBuff) - throws java.io.IOException - { - Node c = document.createComment(strBuff.toString()); - cursor.appendChild(c); - return false; - } - - /** - * Read the document, present in the given stream, and - * return the corresponding {@link HTMLDocument}. - * - * @param input a stream to read from. - * @return a document, reflecting the structure of the provided HTML - * text. - * - * @throws IOException if the reader throws one. - */ - public HTMLDocument parseDocument(Reader input) - throws IOException - { - try - { - document = new DomHTMLDocument(); - - cursor = document; - - parse(input); - - DomHTMLDocument h = document; - document = null; - return h; - } - catch (Exception ex) - { - ex.printStackTrace(); - throw new IOException("Exception: " + ex.getMessage()); - } - } - - /** - * Create a new node. - * @param name the name of node, case insensitive. - * @return the created node. - */ - protected Node createNode(String name) - { - Node new_node = document.createElement(name.toLowerCase()); - AttributeSet hatts = getAttributes(); - NamedNodeMap natts = new_node.getAttributes(); - - Enumeration enumeration = hatts.getAttributeNames(); - Object key; - Node attribute; - - while (hatts != null) - { - while (enumeration.hasMoreElements()) - { - key = enumeration.nextElement(); - attribute = document.createAttribute(key.toString()); - attribute.setNodeValue(hatts.getAttribute(key).toString()); - natts.setNamedItem(attribute); - } - - // The default values are stored in a parent node. - hatts = hatts.getResolveParent(); - } - - return new_node; - } - - /** - * Handle comment by inserting the comment node. - * @param text the comment text. - */ - protected void handleComment(char[] text) - { - Node c = document.createComment(new String(text)); - cursor.appendChild(c); - } - - /** - * Handle the tag with no content. - * @param tag the tag to handle. - */ - protected void handleEmptyTag(TagElement tag) - { - String name = tag.getHTMLTag().toString(); - - if (name.equalsIgnoreCase("#pcdata")) - return; - - Node c = createNode(name); - cursor.appendChild(c); - } - - /** - * Close the given tag. Close and reopen all nested tags. - * @param tag the tag to close. - */ - protected void handleEndTag(TagElement tag) - { - String name = tag.getHTMLTag().toString(); - String nname = cursor.getNodeName(); - - // Closing the current tag. - if (nname != null && nname.equalsIgnoreCase(name)) - { - cursor = cursor.getParentNode(); - } - else - { - Node nCursor = cursor.getParentNode(); - - // Remember the opened nodes. - LinkedList open = new LinkedList(); - Node close = cursor; - while (close != null && !close.getNodeName().equalsIgnoreCase(name)) - { - if (close != document) - open.addFirst(close); - close = close.getParentNode(); - } - - if (close == null) - cursor = document; - else - cursor = close.getParentNode(); - - // Insert the copies of the opened nodes. - Iterator iter = open.iterator(); - while (iter.hasNext()) - { - Node item = (Node) iter.next(); - Node copy = item.cloneNode(true); - cursor.appendChild(copy); - cursor = copy; - } - } - } - - /** - * Handle the start tag by inserting the HTML element. - * @param tag the tag to handle. - */ - protected void handleStartTag(TagElement tag) - { - HTML.Tag h = tag.getHTMLTag(); - Node c = createNode(h.toString()); - cursor.appendChild(c); - cursor = c; - } - - /** - * Handle text by inserting the text node. - * @param text the text to insert. - */ - protected void handleText(char[] text) - { - Node c = document.createTextNode(text, 0, text.length); - cursor.appendChild(c); - } -} |