summaryrefslogtreecommitdiffstats
path: root/libjava/classpath/gnu/javax/swing/text/html/parser
diff options
context:
space:
mode:
authortromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>2007-01-09 19:58:05 +0000
committertromey <tromey@138bc75d-0d04-0410-961f-82ee72b054a4>2007-01-09 19:58:05 +0000
commit65bf3316cf384588453604be6b4f0ed3751a8b0f (patch)
tree996a5f57d4a68c53473382e45cb22f574cb3e4db /libjava/classpath/gnu/javax/swing/text/html/parser
parent8fc56618a84446beccd45b80381cdfe0e94050df (diff)
downloadppe42-gcc-65bf3316cf384588453604be6b4f0ed3751a8b0f.tar.gz
ppe42-gcc-65bf3316cf384588453604be6b4f0ed3751a8b0f.zip
Merged gcj-eclipse branch to trunk.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@120621 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libjava/classpath/gnu/javax/swing/text/html/parser')
-rw-r--r--libjava/classpath/gnu/javax/swing/text/html/parser/GnuParserDelegator.java3
-rw-r--r--libjava/classpath/gnu/javax/swing/text/html/parser/HTML_401F.java4
-rw-r--r--libjava/classpath/gnu/javax/swing/text/html/parser/HTML_401Swing.java91
-rw-r--r--libjava/classpath/gnu/javax/swing/text/html/parser/htmlValidator.java5
-rw-r--r--libjava/classpath/gnu/javax/swing/text/html/parser/support/Parser.java70
-rw-r--r--libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Constants.java11
-rw-r--r--libjava/classpath/gnu/javax/swing/text/html/parser/support/textPreProcessor.java38
7 files changed, 94 insertions, 128 deletions
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/GnuParserDelegator.java b/libjava/classpath/gnu/javax/swing/text/html/parser/GnuParserDelegator.java
index 841db667e84..273461a721e 100644
--- a/libjava/classpath/gnu/javax/swing/text/html/parser/GnuParserDelegator.java
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/GnuParserDelegator.java
@@ -43,6 +43,7 @@ import java.io.Reader;
import java.io.Serializable;
import javax.swing.text.BadLocationException;
+import javax.swing.text.SimpleAttributeSet;
import javax.swing.text.html.HTMLEditorKit;
import javax.swing.text.html.HTMLEditorKit.ParserCallback;
import javax.swing.text.html.parser.DTD;
@@ -92,7 +93,7 @@ public class GnuParserDelegator extends ParserDelegator implements Serializable
protected final void handleStartTag(TagElement tag)
{
- htmlAttributeSet attributes = gnu.getAttributes();
+ SimpleAttributeSet attributes = gnu.getAttributes();
if (tag.fictional())
attributes.addAttribute(ParserCallback.IMPLIED, Boolean.TRUE);
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/HTML_401F.java b/libjava/classpath/gnu/javax/swing/text/html/parser/HTML_401F.java
index c3c347e36e3..1894b6a1ac6 100644
--- a/libjava/classpath/gnu/javax/swing/text/html/parser/HTML_401F.java
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/HTML_401F.java
@@ -2445,8 +2445,10 @@ public class HTML_401F
attr(VALUE, null, null, 0, IMPLIED)
}
);
+
+ // Headers in the paragraph are not allowed.
defElement(P, 0, false, true, new ContentModel( 0,
- new noTagModel(P), null),
+ new noTagModel(new String[] { P, H1, H2, H3, H4, H5, H6 }), null),
NONE
,
new String[] {
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/HTML_401Swing.java b/libjava/classpath/gnu/javax/swing/text/html/parser/HTML_401Swing.java
deleted file mode 100644
index 9c934f647de..00000000000
--- a/libjava/classpath/gnu/javax/swing/text/html/parser/HTML_401Swing.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/* HTML_401Swing.java -- The HTML 4.01 DTD, adapted for HTML rendering in Swing
- Copyright (C) 2006 Free Software Foundation, Inc.
-
-This file is part of GNU Classpath.
-
-GNU Classpath is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU Classpath is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU Classpath; see the file COPYING. If not, write to the
-Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301 USA.
-
-Linking this library statically or dynamically with other modules is
-making a combined work based on this library. Thus, the terms and
-conditions of the GNU General Public License cover the whole
-combination.
-
-As a special exception, the copyright holders of this library give you
-permission to link this library with independent modules to produce an
-executable, regardless of the license terms of these independent
-modules, and to copy and distribute the resulting executable under
-terms of your choice, provided that you also meet, for each linked
-independent module, the terms and conditions of the license of that
-module. An independent module is a module which is not derived from
-or based on this library. If you modify this library, you may extend
-this exception to your version of the library, but you are not
-obligated to do so. If you do not wish to do so, delete this
-exception statement from your version. */
-
-
-package gnu.javax.swing.text.html.parser;
-
-import javax.swing.text.html.parser.DTD;
-
-/**
- * This class is necessary because the current implementation of the GNU
- * Classpath Swing requires always enclose the text into paragraphs.
- *
- * @author Audrius Meskauskas (AudriusA@Bioinformatics.org)
- */
-public class HTML_401Swing extends HTML_401F
-{
- /**
- * The singleton instance;
- */
- final static HTML_401Swing singleton = new HTML_401Swing();
-
- /**
- * Either takes the document (by name) from DTD table, or
- * creates a new instance and registers it in the tabe.
- * The document is registerd under name "-//W3C//DTD HTML 4.01 Frameset//EN".
- * @return The new or existing DTD for parsing HTML 4.01 Frameset.
- */
- public static DTD getInstance()
- {
- return singleton;
- }
-
- /**
- * Get elements that are allowed in the document body, at the zero level.
- * This list disallows the text at this level (the implied P tag will be
- * generated). It also disallows A, B, I, U, CITE and other similar
- * elements that have the plain text inside. They will also be placed
- * inside the generated implied P tags.
- */
- protected String[] getBodyElements()
- {
- return new String[] {
- APPLET, BASEFONT,
- BR, BUTTON,
- IFRAME, IMG,
- INPUT, LABEL, MAP, OBJECT,
- SCRIPT, SELECT,
- TEXTAREA,
- BLOCKQUOTE, CENTER, DEL, DIR,
- DIV, DL, FIELDSET, FORM, H1,
- H2, H3, H4, H5, H6,
- HR, INS, ISINDEX, MENU, NOFRAMES,
- NOSCRIPT, OL, P, PRE, TABLE,
- UL
- };
- }
-}
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/htmlValidator.java b/libjava/classpath/gnu/javax/swing/text/html/parser/htmlValidator.java
index 4d287a67763..7507850e8f9 100644
--- a/libjava/classpath/gnu/javax/swing/text/html/parser/htmlValidator.java
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/htmlValidator.java
@@ -153,7 +153,7 @@ public abstract class htmlValidator
* Remove the given tag from the stack or (if found) from the list
* of the forcibly closed tags.
*/
- public void closeTag(TagElement tElement)
+ public boolean closeTag(TagElement tElement)
{
HTML.Tag tag = tElement.getHTMLTag();
hTag x;
@@ -191,11 +191,12 @@ public abstract class htmlValidator
}
stack.remove(x);
- return;
+ return true;
}
}
}
s_error("Closing unopened <" + tag + ">");
+ return false;
}
/**
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/Parser.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/Parser.java
index 92f9b27c5d9..3a407310e74 100644
--- a/libjava/classpath/gnu/javax/swing/text/html/parser/support/Parser.java
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/Parser.java
@@ -56,6 +56,7 @@ import java.util.TreeSet;
import java.util.Vector;
import javax.swing.text.ChangedCharSetException;
+import javax.swing.text.SimpleAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.parser.AttributeList;
import javax.swing.text.html.parser.DTD;
@@ -250,9 +251,9 @@ public class Parser
* Get the attributes of the current tag.
* @return The attribute set, representing the attributes of the current tag.
*/
- public htmlAttributeSet getAttributes()
+ public SimpleAttributeSet getAttributes()
{
- return attributes;
+ return new SimpleAttributeSet(attributes);
}
/**
@@ -497,6 +498,9 @@ public class Parser
mustBe(t.kind);
}
hTag = new Token(start, last);
+
+ // Consume any whitespace immediately following a comment.
+ optional(WS);
handleComment();
}
@@ -579,6 +583,8 @@ public class Parser
);
}
}
+ // Consume any whitespace that follows the Sgml insertion.
+ optional(WS);
}
/**
@@ -658,7 +664,10 @@ public class Parser
else
text = textProcessor.preprocess(buffer);
- if (text != null && text.length > 0)
+ if (text != null && text.length > 0
+ // According to the specs we need to discard whitespace immediately
+ // before a closing tag.
+ && (text.length > 1 || text[0] != ' ' || ! TAG_CLOSE.matches(this)))
{
TagElement pcdata = new TagElement(dtd.getElement("#pcdata"));
attributes = htmlAttributeSet.EMPTY_HTML_ATTRIBUTE_SET;
@@ -889,6 +898,8 @@ public class Parser
protected void parseDocument()
throws ParseException
{
+ // Read up any initial whitespace.
+ optional(WS);
while (getTokenAhead().kind != EOF)
{
advanced = false;
@@ -979,13 +990,15 @@ public class Parser
+ next.getImage() + "'");
attrValue = value.getImage();
}
- else if (next.kind == SLASH)
- // The slash in this context is treated as the ordinary
- // character, not as a token. The slash may be part of
+ else if (next.kind == SLASH || next.kind == OTHER)
+ // The slash and other characters (like %) in this context is
+ // treated as the ordinary
+ // character, not as a token. The character may be part of
// the unquoted URL.
{
StringBuffer image = new StringBuffer(value.getImage());
- while (next.kind == NUMTOKEN || next.kind == SLASH)
+ while (next.kind == NUMTOKEN || next.kind == SLASH
+ || next.kind == OTHER)
{
image.append(getNextToken().getImage());
next = getTokenAhead();
@@ -1177,6 +1190,13 @@ public class Parser
{
validator.validateTag(tag, attributes);
handleEmptyTag(tag);
+ HTML.Tag h = tag.getHTMLTag();
+ // When a block tag is closed, consume whitespace that follows after
+ // it.
+ // For some unknown reason a FRAME tag is not treated as block element.
+ // However in this case it should be treated as such.
+ if (isBlock(h))
+ optional(WS);
}
catch (ChangedCharSetException ex)
{
@@ -1192,8 +1212,8 @@ public class Parser
*/
private void _handleEndTag(TagElement tag)
{
- validator.closeTag(tag);
- _handleEndTag_remaining(tag);
+ if (validator.closeTag(tag))
+ _handleEndTag_remaining(tag);
}
/**
@@ -1213,6 +1233,11 @@ public class Parser
if (preformatted < 0)
preformatted = 0;
+ // When a block tag is closed, consume whitespace that follows after
+ // it.
+ if (isBlock(h))
+ optional(WS);
+
if (h == HTML.Tag.TITLE)
{
titleOpen = false;
@@ -1239,6 +1264,9 @@ public class Parser
HTML.Tag h = tag.getHTMLTag();
+ if (isBlock(h))
+ optional(WS);
+
if (h.isPreformatted())
preformatted++;
@@ -1418,8 +1446,6 @@ public class Parser
hTag = new Token(start, next);
- attributes.setResolveParent(defaulter.getDefaultParameters(name.getImage()));
-
if (!end)
{
// The tag body contains errors. If additionally the tag
@@ -1457,7 +1483,12 @@ public class Parser
if (te.getElement().type == DTDConstants.EMPTY)
_handleEmptyTag(te);
else
- _handleStartTag(te);
+ {
+ // According to the specs we need to consume whitespace following
+ // immediately after a opening tag.
+ optional(WS);
+ _handleStartTag(te);
+ }
}
}
@@ -1483,4 +1514,19 @@ public class Parser
{
error("Whitespace here is not permitted");
}
+
+ /**
+ * Returns true when the specified tag should be considered a block tag
+ * wrt whitespace handling. We need this special handling, since there
+ * are a couple of tags that we must treat as block tags but which aren't
+ * officially block tags.
+ *
+ * @param tag the tag to check
+ * @return true when the specified tag should be considered a block tag
+ * wrt whitespace handling
+ */
+ private boolean isBlock(HTML.Tag tag)
+ {
+ return tag.isBlock() || tag == HTML.Tag.STYLE || tag == HTML.Tag.FRAME;
+ }
}
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Constants.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Constants.java
index 283d32385ef..5416582adca 100644
--- a/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Constants.java
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/low/Constants.java
@@ -209,6 +209,17 @@ public class Constants
}
);
+ /**
+ * Ordinary HTML tag closing pattern.
+ */
+ public static final pattern TAG_CLOSE =
+ new pattern(new node[]
+ {
+ new node(BEGIN), new node(WS, true), new node(SLASH),
+ new node(WS, true), new node(NUMTOKEN)
+ }
+ );
+
/* Special tokens */
/**
diff --git a/libjava/classpath/gnu/javax/swing/text/html/parser/support/textPreProcessor.java b/libjava/classpath/gnu/javax/swing/text/html/parser/support/textPreProcessor.java
index cc1610585a6..6fd79e2589e 100644
--- a/libjava/classpath/gnu/javax/swing/text/html/parser/support/textPreProcessor.java
+++ b/libjava/classpath/gnu/javax/swing/text/html/parser/support/textPreProcessor.java
@@ -42,17 +42,17 @@ import gnu.javax.swing.text.html.parser.support.low.Constants;
/**
* Pre - processes text in text parts of the html document.
- * Not thread - safe.
+ *
* @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
*/
public class textPreProcessor
{
/**
- * Pre - process non-preformatted text.
- * \t, \r and \n mutate into spaces, then multiple spaces mutate
- * into single one, all whitespace around tags is consumed.
- * The content of the passed buffer is destroyed.
- * @param text A text to pre-process.
+ * Pre - process non-preformatted text. \t, \r and \n mutate into spaces, then
+ * multiple spaces mutate into single one, all whitespace around tags is
+ * consumed. The content of the passed buffer is destroyed.
+ *
+ * @param a_text A text to pre-process.
*/
public char[] preprocess(StringBuffer a_text)
{
@@ -64,18 +64,15 @@ public class textPreProcessor
int a = 0;
int b = text.length - 1;
- try
- {
- while (Constants.bWHITESPACE.get(text [ a ]))
- a++;
- while (Constants.bWHITESPACE.get(text [ b ]))
- b--;
- }
- catch (ArrayIndexOutOfBoundsException sx)
- {
- // A text fragment, consisting from line breaks only.
- return null;
- }
+ // Remove leading/trailing whitespace, leaving at most one character
+ int len = text.length;
+ while (a + 1 < len && Constants.bWHITESPACE.get(text[a])
+ && Constants.bWHITESPACE.get(text[a + 1]))
+ a++;
+
+ while (b > a && Constants.bWHITESPACE.get(text[b])
+ && Constants.bWHITESPACE.get(text[b - 1]))
+ b--;
a_text.setLength(0);
@@ -83,10 +80,9 @@ public class textPreProcessor
boolean spaceNow;
char c;
- chars:
- for (int i = a; i <= b; i++)
+ chars: for (int i = a; i <= b; i++)
{
- c = text [ i ];
+ c = text[i];
spaceNow = Constants.bWHITESPACE.get(c);
if (spacesWere && spaceNow)
continue chars;
OpenPOWER on IntegriCloud