summaryrefslogtreecommitdiffstats
path: root/import-layers/yocto-poky/bitbake/lib/bs4/builder
diff options
context:
space:
mode:
Diffstat (limited to 'import-layers/yocto-poky/bitbake/lib/bs4/builder')
-rw-r--r--import-layers/yocto-poky/bitbake/lib/bs4/builder/__init__.py7
-rw-r--r--import-layers/yocto-poky/bitbake/lib/bs4/builder/_html5lib.py71
-rw-r--r--import-layers/yocto-poky/bitbake/lib/bs4/builder/_htmlparser.py56
-rw-r--r--import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py47
4 files changed, 125 insertions, 56 deletions
diff --git a/import-layers/yocto-poky/bitbake/lib/bs4/builder/__init__.py b/import-layers/yocto-poky/bitbake/lib/bs4/builder/__init__.py
index 740f5f29c..6ccd4d23d 100644
--- a/import-layers/yocto-poky/bitbake/lib/bs4/builder/__init__.py
+++ b/import-layers/yocto-poky/bitbake/lib/bs4/builder/__init__.py
@@ -80,9 +80,12 @@ builder_registry = TreeBuilderRegistry()
class TreeBuilder(object):
"""Turn a document into a Beautiful Soup object tree."""
+ NAME = "[Unknown tree builder]"
+ ALTERNATE_NAMES = []
features = []
is_xml = False
+ picklable = False
preserve_whitespace_tags = set()
empty_element_tags = None # A tag will be considered an empty-element
# tag when and only when it has no contents.
@@ -153,13 +156,13 @@ class TreeBuilder(object):
universal = self.cdata_list_attributes.get('*', [])
tag_specific = self.cdata_list_attributes.get(
tag_name.lower(), None)
- for attr in attrs.keys():
+ for attr in list(attrs.keys()):
if attr in universal or (tag_specific and attr in tag_specific):
# We have a "class"-type attribute whose string
# value is a whitespace-separated list of
# values. Split it into a list.
value = attrs[attr]
- if isinstance(value, basestring):
+ if isinstance(value, str):
values = whitespace_re.split(value)
else:
# html5lib sometimes calls setAttributes twice
diff --git a/import-layers/yocto-poky/bitbake/lib/bs4/builder/_html5lib.py b/import-layers/yocto-poky/bitbake/lib/bs4/builder/_html5lib.py
index 7de36ae75..f0e5924eb 100644
--- a/import-layers/yocto-poky/bitbake/lib/bs4/builder/_html5lib.py
+++ b/import-layers/yocto-poky/bitbake/lib/bs4/builder/_html5lib.py
@@ -2,6 +2,7 @@ __all__ = [
'HTML5TreeBuilder',
]
+from pdb import set_trace
import warnings
from bs4.builder import (
PERMISSIVE,
@@ -9,7 +10,10 @@ from bs4.builder import (
HTML_5,
HTMLTreeBuilder,
)
-from bs4.element import NamespacedAttribute
+from bs4.element import (
+ NamespacedAttribute,
+ whitespace_re,
+)
import html5lib
from html5lib.constants import namespaces
from bs4.element import (
@@ -22,11 +26,20 @@ from bs4.element import (
class HTML5TreeBuilder(HTMLTreeBuilder):
"""Use html5lib to build a tree."""
- features = ['html5lib', PERMISSIVE, HTML_5, HTML]
+ NAME = "html5lib"
+
+ features = [NAME, PERMISSIVE, HTML_5, HTML]
- def prepare_markup(self, markup, user_specified_encoding):
+ def prepare_markup(self, markup, user_specified_encoding,
+ document_declared_encoding=None, exclude_encodings=None):
# Store the user-specified encoding for use later on.
self.user_specified_encoding = user_specified_encoding
+
+ # document_declared_encoding and exclude_encodings aren't used
+ # ATM because the html5lib TreeBuilder doesn't use
+ # UnicodeDammit.
+ if exclude_encodings:
+ warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
yield (markup, None, None, False)
# These methods are defined by Beautiful Soup.
@@ -37,7 +50,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
doc = parser.parse(markup, encoding=self.user_specified_encoding)
# Set the character encoding detected by the tokenizer.
- if isinstance(markup, unicode):
+ if isinstance(markup, str):
# We need to special-case this because html5lib sets
# charEncoding to UTF-8 if it gets Unicode input.
doc.original_encoding = None
@@ -51,7 +64,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
- return u'<html><head></head><body>%s</body></html>' % fragment
+ return '<html><head></head><body>%s</body></html>' % fragment
class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
@@ -101,7 +114,16 @@ class AttrList(object):
def __iter__(self):
return list(self.attrs.items()).__iter__()
def __setitem__(self, name, value):
- "set attr", name, value
+ # If this attribute is a multi-valued attribute for this element,
+ # turn its value into a list.
+ list_attr = HTML5TreeBuilder.cdata_list_attributes
+ if (name in list_attr['*']
+ or (self.element.name in list_attr
+ and name in list_attr[self.element.name])):
+ # A node that is being cloned may have already undergone
+ # this procedure.
+ if not isinstance(value, list):
+ value = whitespace_re.split(value)
self.element[name] = value
def items(self):
return list(self.attrs.items())
@@ -124,7 +146,7 @@ class Element(html5lib.treebuilders._base.Node):
def appendChild(self, node):
string_child = child = None
- if isinstance(node, basestring):
+ if isinstance(node, str):
# Some other piece of code decided to pass in a string
# instead of creating a TextElement object to contain the
# string.
@@ -139,7 +161,7 @@ class Element(html5lib.treebuilders._base.Node):
else:
child = node.element
- if not isinstance(child, basestring) and child.parent is not None:
+ if not isinstance(child, str) and child.parent is not None:
node.element.extract()
if (string_child and self.element.contents
@@ -152,7 +174,7 @@ class Element(html5lib.treebuilders._base.Node):
old_element.replace_with(new_element)
self.soup._most_recent_element = new_element
else:
- if isinstance(node, basestring):
+ if isinstance(node, str):
# Create a brand new NavigableString from this string.
child = self.soup.new_string(node)
@@ -161,6 +183,12 @@ class Element(html5lib.treebuilders._base.Node):
# immediately after the parent, if it has no children.)
if self.element.contents:
most_recent_element = self.element._last_descendant(False)
+ elif self.element.next_element is not None:
+ # Something from further ahead in the parse tree is
+ # being inserted into this earlier element. This is
+ # very annoying because it means an expensive search
+ # for the last element in the tree.
+ most_recent_element = self.soup._last_descendant()
else:
most_recent_element = self.element
@@ -172,6 +200,7 @@ class Element(html5lib.treebuilders._base.Node):
return AttrList(self.element)
def setAttributes(self, attributes):
+
if attributes is not None and len(attributes) > 0:
converted_attributes = []
@@ -183,7 +212,7 @@ class Element(html5lib.treebuilders._base.Node):
self.soup.builder._replace_cdata_list_attribute_values(
self.name, attributes)
- for name, value in attributes.items():
+ for name, value in list(attributes.items()):
self.element[name] = value
# The attributes may contain variables that need substitution.
@@ -218,6 +247,9 @@ class Element(html5lib.treebuilders._base.Node):
def reparentChildren(self, new_parent):
"""Move all of this tag's children into another tag."""
+ # print "MOVE", self.element.contents
+ # print "FROM", self.element
+ # print "TO", new_parent.element
element = self.element
new_parent_element = new_parent.element
# Determine what this tag's next_element will be once all the children
@@ -236,17 +268,28 @@ class Element(html5lib.treebuilders._base.Node):
new_parents_last_descendant_next_element = new_parent_element.next_element
to_append = element.contents
- append_after = new_parent.element.contents
+ append_after = new_parent_element.contents
if len(to_append) > 0:
# Set the first child's previous_element and previous_sibling
# to elements within the new parent
first_child = to_append[0]
- first_child.previous_element = new_parents_last_descendant
+ if new_parents_last_descendant:
+ first_child.previous_element = new_parents_last_descendant
+ else:
+ first_child.previous_element = new_parent_element
first_child.previous_sibling = new_parents_last_child
+ if new_parents_last_descendant:
+ new_parents_last_descendant.next_element = first_child
+ else:
+ new_parent_element.next_element = first_child
+ if new_parents_last_child:
+ new_parents_last_child.next_sibling = first_child
# Fix the last child's next_element and next_sibling
last_child = to_append[-1]
last_child.next_element = new_parents_last_descendant_next_element
+ if new_parents_last_descendant_next_element:
+ new_parents_last_descendant_next_element.previous_element = last_child
last_child.next_sibling = None
for child in to_append:
@@ -257,6 +300,10 @@ class Element(html5lib.treebuilders._base.Node):
element.contents = []
element.next_element = final_next_element
+ # print "DONE WITH MOVE"
+ # print "FROM", self.element
+ # print "TO", new_parent_element
+
def cloneNode(self):
tag = self.soup.new_tag(self.element.name, self.namespace)
node = Element(tag, self.soup, self.namespace)
diff --git a/import-layers/yocto-poky/bitbake/lib/bs4/builder/_htmlparser.py b/import-layers/yocto-poky/bitbake/lib/bs4/builder/_htmlparser.py
index ca8d8b892..bb0a63f2f 100644
--- a/import-layers/yocto-poky/bitbake/lib/bs4/builder/_htmlparser.py
+++ b/import-layers/yocto-poky/bitbake/lib/bs4/builder/_htmlparser.py
@@ -4,10 +4,16 @@ __all__ = [
'HTMLParserTreeBuilder',
]
-from HTMLParser import (
- HTMLParser,
- HTMLParseError,
- )
+from html.parser import HTMLParser
+
+try:
+ from html.parser import HTMLParseError
+except ImportError as e:
+ # HTMLParseError is removed in Python 3.5. Since it can never be
+ # thrown in 3.5, we can just define our own class as a placeholder.
+ class HTMLParseError(Exception):
+ pass
+
import sys
import warnings
@@ -19,10 +25,10 @@ import warnings
# At the end of this file, we monkeypatch HTMLParser so that
# strict=True works well on Python 3.2.2.
major, minor, release = sys.version_info[:3]
-CONSTRUCTOR_TAKES_STRICT = (
- major > 3
- or (major == 3 and minor > 2)
- or (major == 3 and minor == 2 and release >= 3))
+CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
+CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
+CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
+
from bs4.element import (
CData,
@@ -63,7 +69,8 @@ class BeautifulSoupHTMLParser(HTMLParser):
def handle_charref(self, name):
# XXX workaround for a bug in HTMLParser. Remove this once
- # it's fixed.
+ # it's fixed in all supported versions.
+ # http://bugs.python.org/issue13633
if name.startswith('x'):
real_name = int(name.lstrip('x'), 16)
elif name.startswith('X'):
@@ -72,9 +79,9 @@ class BeautifulSoupHTMLParser(HTMLParser):
real_name = int(name)
try:
- data = unichr(real_name)
- except (ValueError, OverflowError), e:
- data = u"\N{REPLACEMENT CHARACTER}"
+ data = chr(real_name)
+ except (ValueError, OverflowError) as e:
+ data = "\N{REPLACEMENT CHARACTER}"
self.handle_data(data)
@@ -113,14 +120,6 @@ class BeautifulSoupHTMLParser(HTMLParser):
def handle_pi(self, data):
self.soup.endData()
- if data.endswith("?") and data.lower().startswith("xml"):
- # "An XHTML processing instruction using the trailing '?'
- # will cause the '?' to be included in data." - HTMLParser
- # docs.
- #
- # Strip the question mark so we don't end up with two
- # question marks.
- data = data[:-1]
self.soup.handle_data(data)
self.soup.endData(ProcessingInstruction)
@@ -128,26 +127,31 @@ class BeautifulSoupHTMLParser(HTMLParser):
class HTMLParserTreeBuilder(HTMLTreeBuilder):
is_xml = False
- features = [HTML, STRICT, HTMLPARSER]
+ picklable = True
+ NAME = HTMLPARSER
+ features = [NAME, HTML, STRICT]
def __init__(self, *args, **kwargs):
- if CONSTRUCTOR_TAKES_STRICT:
+ if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
kwargs['strict'] = False
+ if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
+ kwargs['convert_charrefs'] = False
self.parser_args = (args, kwargs)
def prepare_markup(self, markup, user_specified_encoding=None,
- document_declared_encoding=None):
+ document_declared_encoding=None, exclude_encodings=None):
"""
:return: A 4-tuple (markup, original encoding, encoding
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
- if isinstance(markup, unicode):
+ if isinstance(markup, str):
yield (markup, None, None, False)
return
try_encodings = [user_specified_encoding, document_declared_encoding]
- dammit = UnicodeDammit(markup, try_encodings, is_html=True)
+ dammit = UnicodeDammit(markup, try_encodings, is_html=True,
+ exclude_encodings=exclude_encodings)
yield (dammit.markup, dammit.original_encoding,
dammit.declared_html_encoding,
dammit.contains_replacement_characters)
@@ -158,7 +162,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser.soup = self.soup
try:
parser.feed(markup)
- except HTMLParseError, e:
+ except HTMLParseError as e:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e
diff --git a/import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py b/import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py
index fa5d49875..9c6c14ee6 100644
--- a/import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py
+++ b/import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py
@@ -4,10 +4,15 @@ __all__ = [
]
from io import BytesIO
-from StringIO import StringIO
+from io import StringIO
import collections
from lxml import etree
-from bs4.element import Comment, Doctype, NamespacedAttribute
+from bs4.element import (
+ Comment,
+ Doctype,
+ NamespacedAttribute,
+ ProcessingInstruction,
+)
from bs4.builder import (
FAST,
HTML,
@@ -25,8 +30,11 @@ class LXMLTreeBuilderForXML(TreeBuilder):
is_xml = True
+ NAME = "lxml-xml"
+ ALTERNATE_NAMES = ["xml"]
+
# Well, it's permissive by XML parser standards.
- features = [LXML, XML, FAST, PERMISSIVE]
+ features = [NAME, LXML, XML, FAST, PERMISSIVE]
CHUNK_SIZE = 512
@@ -70,6 +78,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
return (None, tag)
def prepare_markup(self, markup, user_specified_encoding=None,
+ exclude_encodings=None,
document_declared_encoding=None):
"""
:yield: A series of 4-tuples.
@@ -78,12 +87,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
Each 4-tuple represents a strategy for parsing the document.
"""
- if isinstance(markup, unicode):
+ if isinstance(markup, str):
# We were given Unicode. Maybe lxml can parse Unicode on
# this system?
yield markup, None, document_declared_encoding, False
- if isinstance(markup, unicode):
+ if isinstance(markup, str):
# No, apparently not. Convert the Unicode to UTF-8 and
# tell lxml to parse it as UTF-8.
yield (markup.encode("utf8"), "utf8",
@@ -95,14 +104,15 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# the document as each one in turn.
is_html = not self.is_xml
try_encodings = [user_specified_encoding, document_declared_encoding]
- detector = EncodingDetector(markup, try_encodings, is_html)
+ detector = EncodingDetector(
+ markup, try_encodings, is_html, exclude_encodings)
for encoding in detector.encodings:
yield (detector.markup, encoding, document_declared_encoding, False)
def feed(self, markup):
if isinstance(markup, bytes):
markup = BytesIO(markup)
- elif isinstance(markup, unicode):
+ elif isinstance(markup, str):
markup = StringIO(markup)
# Call feed() at least once, even if the markup is empty,
@@ -117,7 +127,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
if len(data) != 0:
self.parser.feed(data)
self.parser.close()
- except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+ except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(str(e))
def close(self):
@@ -135,12 +145,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.nsmaps.append(None)
elif len(nsmap) > 0:
# A new namespace mapping has come into play.
- inverted_nsmap = dict((value, key) for key, value in nsmap.items())
+ inverted_nsmap = dict((value, key) for key, value in list(nsmap.items()))
self.nsmaps.append(inverted_nsmap)
# Also treat the namespace mapping as a set of attributes on the
# tag, so we can recreate it later.
attrs = attrs.copy()
- for prefix, namespace in nsmap.items():
+ for prefix, namespace in list(nsmap.items()):
attribute = NamespacedAttribute(
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
attrs[attribute] = namespace
@@ -149,7 +159,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# from lxml with namespaces attached to their names, and
# turn then into NamespacedAttribute objects.
new_attrs = {}
- for attr, value in attrs.items():
+ for attr, value in list(attrs.items()):
namespace, attr = self._getNsTag(attr)
if namespace is None:
new_attrs[attr] = value
@@ -189,7 +199,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.nsmaps.pop()
def pi(self, target, data):
- pass
+ self.soup.endData()
+ self.soup.handle_data(target + ' ' + data)
+ self.soup.endData(ProcessingInstruction)
def data(self, content):
self.soup.handle_data(content)
@@ -207,12 +219,15 @@ class LXMLTreeBuilderForXML(TreeBuilder):
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
- return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
+ return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
- features = [LXML, HTML, FAST, PERMISSIVE]
+ NAME = LXML
+ ALTERNATE_NAMES = ["lxml-html"]
+
+ features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
is_xml = False
def default_parser(self, encoding):
@@ -224,10 +239,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
self.parser = self.parser_for(encoding)
self.parser.feed(markup)
self.parser.close()
- except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+ except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(str(e))
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
- return u'<html><body>%s</body></html>' % fragment
+ return '<html><body>%s</body></html>' % fragment
OpenPOWER on IntegriCloud