diff options
| author | mkoch <mkoch@138bc75d-0d04-0410-961f-82ee72b054a4> | 2005-04-19 12:19:44 +0000 |
|---|---|---|
| committer | mkoch <mkoch@138bc75d-0d04-0410-961f-82ee72b054a4> | 2005-04-19 12:19:44 +0000 |
| commit | c191f2786686921ed00e2c87a1d24a418233113e (patch) | |
| tree | a6d7de022c0f8b41819fd0f9b2f2d17634143cec /libjava/gnu/xml/dom/html2/DomHTMLParser.java | |
| parent | 7355f7077e6aa61a5478d29136be8865cf7a0b6b (diff) | |
| download | ppe42-gcc-c191f2786686921ed00e2c87a1d24a418233113e.tar.gz ppe42-gcc-c191f2786686921ed00e2c87a1d24a418233113e.zip | |
2005-04-19 Andrew John Hughes <gnu_andrew@member.fsf.org>
* gnu/xml/dom/html2/DomHTMLParser.java:
Changed 'enum' references to become 'enumeration'.
2005-04-19 Audrius Meskauskas <audriusa@bluewin.ch>
* gnu/xml/dom/html2/DomHTMLParser.java: New file.
2005-04-19 Andrew John Hughes <gnu_andrew@member.fsf.org>
* gnu/javax/swing/text/html/parser/htmlAttributeSet.java:
(getAttributeNames()): Replaced 'enum' with 'enumeration'.
* gnu/javax/swing/text/html/parser/htmlValidator.java:
(validateParameters(TagElement,htmlAttributeSet)):
Replaced 'enum' with 'enumeration'.
(validateAttribute(TagElement,htmlAttributeSet,Enumeration,Enumeration)):
Likewise.
2005-04-19 Audrius Meskauskas <audriusa@bluewin.ch>
* javax/swing/text/html/parser/Entity.java (getType): New method.
* javax/swing/text/html/parser/DocumentParser.java:
Inherit from javax.swing.text.html.parser.Parser.
2005-04-19 Chris Burdess <dog@gnu.org>
* gnu/xml/dom/html2/DomHTMLDocument.java: Fixed element creation and
check for HTML/XHTML namespace.
2005-04-19 Audrius Meskauskas <audriusa@bluewin.ch>
* javax/swing/text/html/parser/Parser.java,
javax/swing/text/html/parser/Entity.java:
Inheriting from DTDConstants.
* javax/swing/text/html/parser/AttributeList.java
(getValues): Changed return type.
* javax/swing/text/html/parser/DocumentParser
(parse): Adding the callback parameter that receives
the parsing events.
2005-04-19 Chris Burdess <dog@gnu.org>
* gnu/xml/dom/DomImpl.java,
gnu/xml/dom/html2/DomHTMLAnchorElement.java,
gnu/xml/dom/html2/DomHTMLDocument.java,
gnu/xml/dom/html2/DomHTMLElement.java,
gnu/xml/dom/html2/DomHTMLFormElement.java,
gnu/xml/dom/html2/DomHTMLFrameElement.java,
gnu/xml/dom/html2/DomHTMLIFrameElement.java,
gnu/xml/dom/html2/DomHTMLImpl.java,
gnu/xml/dom/html2/DomHTMLInputElement.java,
gnu/xml/dom/html2/DomHTMLObjectElement.java,
gnu/xml/dom/html2/DomHTMLOptionElement.java,
gnu/xml/dom/html2/DomHTMLSelectElement.java,
gnu/xml/dom/html2/DomHTMLTableCellElement.java,
gnu/xml/dom/html2/DomHTMLTableElement.java,
gnu/xml/dom/html2/DomHTMLTableRowElement.java,
gnu/xml/dom/html2/DomHTMLTableSectionElement.java,
gnu/xml/dom/html2/DomHTMLTextAreaElement.java: JAXP integration,
UI events, and tree utility functions.
2005-04-19 Michael Koch <konqueror@gmx.de>
* gnu/javax/swing/text/html/parser/HTML_401F.java,
gnu/javax/swing/text/html/parser/gnuDTD.java,
gnu/javax/swing/text/html/parser/models/node.java:
Reworked import statements.
2005-04-19 Audrius Meskauskas, Lithuania <AudriusA@Bioinformatics.org>
* javax/swing/text/html/HTMLDocument.java: New file.
2005-04-19 Michael Koch <konqueror@gmx.de>
* javax/swing/text/html/HTMLFrameHyperlinkEvent.java:
Reformatted.
* javax/swing/text/html/parser/AttributeList.java:
Fixed order of modifiers.
(AttributeList): Made final.
* javax/swing/text/html/parser/ContentModel.java:
Fixed html characters in javadocs.
* javax/swing/text/html/parser/DTD.java
(DTD): Don't implement java.io.Serializable directly.
(getElement): Simplified.
* javax/swing/text/html/parser/DTDConstants.java:
Reformatted file.
* javax/swing/text/html/parser/Element.java:
Fixed order of modifiers.
* javax/swing/text/html/parser/Parser.java:
Reformatted. Don't use fully-qualified class names.
* javax/swing/text/html/parser/ParserDelegator.java:
Fixed order of modifiers.
* javax/swing/text/rtf/RTFParser.java:
Re-ordered import statements.
* javax/swing/text/rtf/RTFScanner.java:
Removed unused import statement.
2005-04-19 Chris Burdess <dog@gnu.org>
* gnu/xml/dom/html2/DomHTMLButtonElement.java,
gnu/xml/dom/html2/DomHTMLCollection.java,
gnu/xml/dom/html2/DomHTMLDocument.java,
gnu/xml/dom/html2/DomHTMLElement.java: Extensions for new element
types.
* gnu/xml/dom/html2/DomHTMLDListElement.java,
gnu/xml/dom/html2/DomHTMLDirectoryElement.java,
gnu/xml/dom/html2/DomHTMLDivElement.java,
gnu/xml/dom/html2/DomHTMLFieldSetElement.java,
gnu/xml/dom/html2/DomHTMLFontElement.java,
gnu/xml/dom/html2/DomHTMLFormElement.java,
gnu/xml/dom/html2/DomHTMLFrameElement.java,
gnu/xml/dom/html2/DomHTMLFrameSetElement.java,
gnu/xml/dom/html2/DomHTMLHRElement.java,
gnu/xml/dom/html2/DomHTMLHeadElement.java,
gnu/xml/dom/html2/DomHTMLHeadingElement.java,
gnu/xml/dom/html2/DomHTMLHtmlElement.java,
gnu/xml/dom/html2/DomHTMLIFrameElement.java,
gnu/xml/dom/html2/DomHTMLImageElement.java,
gnu/xml/dom/html2/DomHTMLInputElement.java,
gnu/xml/dom/html2/DomHTMLIsIndexElement.java,
gnu/xml/dom/html2/DomHTMLLIElement.java,
gnu/xml/dom/html2/DomHTMLLabelElement.java,
gnu/xml/dom/html2/DomHTMLLegendElement.java,
gnu/xml/dom/html2/DomHTMLLinkElement.java,
gnu/xml/dom/html2/DomHTMLMapElement.java,
gnu/xml/dom/html2/DomHTMLMenuElement.java,
gnu/xml/dom/html2/DomHTMLMetaElement.java,
gnu/xml/dom/html2/DomHTMLModElement.java,
gnu/xml/dom/html2/DomHTMLOListElement.java,
gnu/xml/dom/html2/DomHTMLObjectElement.java,
gnu/xml/dom/html2/DomHTMLOptGroupElement.java,
gnu/xml/dom/html2/DomHTMLOptionElement.java,
gnu/xml/dom/html2/DomHTMLParagraphElement.java,
gnu/xml/dom/html2/DomHTMLParamElement.java,
gnu/xml/dom/html2/DomHTMLPreElement.java,
gnu/xml/dom/html2/DomHTMLQuoteElement.java,
gnu/xml/dom/html2/DomHTMLScriptElement.java,
gnu/xml/dom/html2/DomHTMLSelectElement.java,
gnu/xml/dom/html2/DomHTMLStyleElement.java,
gnu/xml/dom/html2/DomHTMLTableCaptionElement.java,
gnu/xml/dom/html2/DomHTMLTableCellElement.java,
gnu/xml/dom/html2/DomHTMLTableColElement.java,
gnu/xml/dom/html2/DomHTMLTableElement.java,
gnu/xml/dom/html2/DomHTMLTableRowElement.java,
gnu/xml/dom/html2/DomHTMLTableSectionElement.java,
gnu/xml/dom/html2/DomHTMLTextAreaElement.java,
gnu/xml/dom/html2/DomHTMLTitleElement.java,
gnu/xml/dom/html2/DomHTMLUListElement.java: New files.
2005-04-19 Audrius Meskauskas <audriusa@bluewin.ch>
* javax/swing/text/ChangedCharSetException.java,
javax/swing/text/html/HTMLEditorKit.java,
javax/swing/text/html/HTMLFrameHyperlinkEvent.java,
javax/swing/text/html/parser/AttributeList.java,
javax/swing/text/html/parser/ContentModel.java,
javax/swing/text/html/parser/DocumentParser.java,
javax/swing/text/html/parser/DTD.java,
javax/swing/text/html/parser/DTDConstants.java,
javax/swing/text/html/parser/Element.java,
javax/swing/text/html/parser/Entity.java,
javax/swing/text/html/parser/Parser.java,
javax/swing/text/html/parser/TagElement.java,
gnu/javax/swing/text/html/package.html,
gnu/javax/swing/text/html/parser/gnuDTD.java,
gnu/javax/swing/text/html/parser/HTML_401F.java,
gnu/javax/swing/text/html/parser/htmlAttributeSet.java,
gnu/javax/swing/text/html/parser/htmlValidator.java,
gnu/javax/swing/text/html/parser/package.html,
gnu/javax/swing/text/html/parser/models/list.java,
gnu/javax/swing/text/html/parser/models/node.java,
gnu/javax/swing/text/html/parser/models/noTagModel.java,
gnu/javax/swing/text/html/parser/models/package.html,
gnu/javax/swing/text/html/parser/models/PCDATAonly_model.java,
gnu/javax/swing/text/html/parser/models/TableRowContentModel.java,
gnu/javax/swing/text/html/parser/models/transformer.java,
gnu/javax/swing/text/html/parser/support/gnuStringIntMapper.java,
gnu/javax/swing/text/html/parser/support/package.html,
gnu/javax/swing/text/html/parser/support/parameterDefaulter.java,
gnu/javax/swing/text/html/parser/support/Parser.java,
gnu/javax/swing/text/html/parser/support/textPreProcessor.java,
gnu/javax/swing/text/html/parser/support/low/Buffer.java,
gnu/javax/swing/text/html/parser/support/low/Constants.java,
gnu/javax/swing/text/html/parser/support/low/Location.java,
gnu/javax/swing/text/html/parser/support/low/node.java,
gnu/javax/swing/text/html/parser/support/low/package.html,
gnu/javax/swing/text/html/parser/support/low/ParseException.java,
gnu/javax/swing/text/html/parser/support/low/pattern.java,
gnu/javax/swing/text/html/parser/support/low/Queue.java,
gnu/javax/swing/text/html/parser/support/low/ReaderTokenizer.java:
New files.
* javax/swing/text/html/HTML.java,
javax/swing/text/html/parser/ParserDelegator.java:
New files (replacing).
* javax/swing/text/html/package.html,
javax/swing/text/html/parser/package.html: Documenting the
packages.
2005-04-19 Chris Burdess <dog@gnu.org>
* gnu/xml/dom/html2/DomHTMLAnchorElement.java,
gnu/xml/dom/html2/DomHTMLAppletElement.java,
gnu/xml/dom/html2/DomHTMLAreaElement.java,
gnu/xml/dom/html2/DomHTMLBaseElement.java,
gnu/xml/dom/html2/DomHTMLBaseFontElement.java,
gnu/xml/dom/html2/DomHTMLBodyElement.java,
gnu/xml/dom/html2/DomHTMLBRElement.java,
gnu/xml/dom/html2/DomHTMLButtonElement.java,
gnu/xml/dom/html2/DomHTMLCollection.java,
gnu/xml/dom/html2/DomHTMLDocument.java,
gnu/xml/dom/html2/DomHTMLElement.java: New files.
2005-04-19 Michael Koch <konqueror@gmx.de>
* Makefile.am: Added new files.
* Makefile.in: Regenerated.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@98406 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libjava/gnu/xml/dom/html2/DomHTMLParser.java')
| -rw-r--r-- | libjava/gnu/xml/dom/html2/DomHTMLParser.java | 266 |
1 files changed, 266 insertions, 0 deletions
diff --git a/libjava/gnu/xml/dom/html2/DomHTMLParser.java b/libjava/gnu/xml/dom/html2/DomHTMLParser.java new file mode 100644 index 00000000000..f1b970e1264 --- /dev/null +++ b/libjava/gnu/xml/dom/html2/DomHTMLParser.java @@ -0,0 +1,266 @@ +/* DomHTMLParser.java -- + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package gnu.xml.dom.html2; + +import gnu.javax.swing.text.html.parser.support.Parser; + +import java.io.IOException; +import java.io.Reader; + +import java.util.Enumeration; +import java.util.Iterator; +import java.util.LinkedList; + +import javax.swing.text.AttributeSet; +import javax.swing.text.html.HTML; +import javax.swing.text.html.parser.DTD; +import javax.swing.text.html.parser.TagElement; + +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.html2.HTMLDocument; + +/** + * This parser reads HTML from the given stream and stores into + * {@link HTMLDocument}. The HTML tag becomes the {@link Node}. + * The tag attributes become the node attributes. The text inside + * HTML tag is inserted as one or several text nodes. The nested + * HTML tags are inserted as child nodes. + * + * If the strict tree structure, closing the tag means closing all + * nested tags. To work around this, this parser closes the nested + * tags and immediately reopens them after the closed tag. + * In this way, <code><b><i>c</b>d</code> + * is parsed as <code><b><i>c</i></b><i>d</code> . + * + * @author Audrius Meskauskas (AudriusA@Bioinformatics.org) + */ +public class DomHTMLParser + extends gnu.javax.swing.text.html.parser.support.Parser +{ + /** + * The target where HTML document will be inserted. + */ + protected DomHTMLDocument document; + + /** + * The subsequently created new nodes will be inserted as the + * childs of this cursor. + */ + protected Node cursor; + + /** + * Create parser using the given DTD. + * + * @param dtd the DTD (for example, + * {@link gnu.javax.swing.text.html.parser.HTML_401F}). + */ + public DomHTMLParser(DTD dtd) + { + super(dtd); + } + + /** + * Parse SGML insertion ( <! ... > ). + * Currently just treats it as comment. + */ + public boolean parseMarkupDeclarations(StringBuffer strBuff) + throws java.io.IOException + { + Node c = document.createComment(strBuff.toString()); + cursor.appendChild(c); + return false; + } + + /** + * Read the document, present in the given stream, and + * return the corresponding {@link HTMLDocument}. + * + * @param input a stream to read from. + * @return a document, reflecting the structure of the provided HTML + * text. + * + * @throws IOException if the reader throws one. + */ + public HTMLDocument parseDocument(Reader input) + throws IOException + { + try + { + document = new DomHTMLDocument(); + + cursor = document; + + parse(input); + + DomHTMLDocument h = document; + document = null; + return h; + } + catch (Exception ex) + { + ex.printStackTrace(); + throw new IOException("Exception: " + ex.getMessage()); + } + } + + /** + * Create a new node. + * @param name the name of node, case insensitive. + * @return the created node. + */ + protected Node createNode(String name) + { + Node new_node = document.createElement(name.toLowerCase()); + AttributeSet hatts = getAttributes(); + NamedNodeMap natts = new_node.getAttributes(); + + Enumeration enumeration = hatts.getAttributeNames(); + Object key; + Node attribute; + + while (hatts != null) + { + while (enumeration.hasMoreElements()) + { + key = enumeration.nextElement(); + attribute = document.createAttribute(key.toString()); + attribute.setNodeValue(hatts.getAttribute(key).toString()); + natts.setNamedItem(attribute); + } + + // The default values are stored in a parent node. + hatts = hatts.getResolveParent(); + } + + return new_node; + } + + /** + * Handle comment by inserting the comment node. + * @param text the comment text. + */ + protected void handleComment(char[] text) + { + Node c = document.createComment(new String(text)); + cursor.appendChild(c); + } + + /** + * Handle the tag with no content. + * @param tag the tag to handle. + */ + protected void handleEmptyTag(TagElement tag) + { + String name = tag.getHTMLTag().toString(); + + if (name.equalsIgnoreCase("#pcdata")) + return; + + Node c = createNode(name); + cursor.appendChild(c); + } + + /** + * Close the given tag. Close and reopen all nested tags. + * @param tag the tag to close. + */ + protected void handleEndTag(TagElement tag) + { + String name = tag.getHTMLTag().toString(); + String nname = cursor.getNodeName(); + + // Closing the current tag. + if (nname != null && nname.equalsIgnoreCase(name)) + { + cursor = cursor.getParentNode(); + } + else + { + Node nCursor = cursor.getParentNode(); + + // Remember the opened nodes. + LinkedList open = new LinkedList(); + Node close = cursor; + while (close != null && !close.getNodeName().equalsIgnoreCase(name)) + { + if (close != document) + open.addFirst(close); + close = close.getParentNode(); + } + + if (close == null) + cursor = document; + else + cursor = close.getParentNode(); + + // Insert the copies of the opened nodes. + Iterator iter = open.iterator(); + while (iter.hasNext()) + { + Node item = (Node) iter.next(); + Node copy = item.cloneNode(true); + cursor.appendChild(copy); + cursor = copy; + } + } + } + + /** + * Handle the start tag by inserting the HTML element. + * @param tag the tag to handle. + */ + protected void handleStartTag(TagElement tag) + { + HTML.Tag h = tag.getHTMLTag(); + Node c = createNode(h.toString()); + cursor.appendChild(c); + cursor = c; + } + + /** + * Handle text by inserting the text node. + * @param text the text to insert. + */ + protected void handleText(char[] text) + { + Node c = document.createTextNode(text, 0, text.length); + cursor.appendChild(c); + } +} |

