summaryrefslogtreecommitdiffstats
path: root/import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py
diff options
context:
space:
mode:
Diffstat (limited to 'import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py')
-rw-r--r--import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py47
1 files changed, 31 insertions, 16 deletions
diff --git a/import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py b/import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py
index fa5d49875..9c6c14ee6 100644
--- a/import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py
+++ b/import-layers/yocto-poky/bitbake/lib/bs4/builder/_lxml.py
@@ -4,10 +4,15 @@ __all__ = [
]
from io import BytesIO
-from StringIO import StringIO
+from io import StringIO
import collections
from lxml import etree
-from bs4.element import Comment, Doctype, NamespacedAttribute
+from bs4.element import (
+ Comment,
+ Doctype,
+ NamespacedAttribute,
+ ProcessingInstruction,
+)
from bs4.builder import (
FAST,
HTML,
@@ -25,8 +30,11 @@ class LXMLTreeBuilderForXML(TreeBuilder):
is_xml = True
+ NAME = "lxml-xml"
+ ALTERNATE_NAMES = ["xml"]
+
# Well, it's permissive by XML parser standards.
- features = [LXML, XML, FAST, PERMISSIVE]
+ features = [NAME, LXML, XML, FAST, PERMISSIVE]
CHUNK_SIZE = 512
@@ -70,6 +78,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
return (None, tag)
def prepare_markup(self, markup, user_specified_encoding=None,
+ exclude_encodings=None,
document_declared_encoding=None):
"""
:yield: A series of 4-tuples.
@@ -78,12 +87,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
Each 4-tuple represents a strategy for parsing the document.
"""
- if isinstance(markup, unicode):
+ if isinstance(markup, str):
# We were given Unicode. Maybe lxml can parse Unicode on
# this system?
yield markup, None, document_declared_encoding, False
- if isinstance(markup, unicode):
+ if isinstance(markup, str):
# No, apparently not. Convert the Unicode to UTF-8 and
# tell lxml to parse it as UTF-8.
yield (markup.encode("utf8"), "utf8",
@@ -95,14 +104,15 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# the document as each one in turn.
is_html = not self.is_xml
try_encodings = [user_specified_encoding, document_declared_encoding]
- detector = EncodingDetector(markup, try_encodings, is_html)
+ detector = EncodingDetector(
+ markup, try_encodings, is_html, exclude_encodings)
for encoding in detector.encodings:
yield (detector.markup, encoding, document_declared_encoding, False)
def feed(self, markup):
if isinstance(markup, bytes):
markup = BytesIO(markup)
- elif isinstance(markup, unicode):
+ elif isinstance(markup, str):
markup = StringIO(markup)
# Call feed() at least once, even if the markup is empty,
@@ -117,7 +127,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
if len(data) != 0:
self.parser.feed(data)
self.parser.close()
- except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+ except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(str(e))
def close(self):
@@ -135,12 +145,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.nsmaps.append(None)
elif len(nsmap) > 0:
# A new namespace mapping has come into play.
- inverted_nsmap = dict((value, key) for key, value in nsmap.items())
+ inverted_nsmap = dict((value, key) for key, value in list(nsmap.items()))
self.nsmaps.append(inverted_nsmap)
# Also treat the namespace mapping as a set of attributes on the
# tag, so we can recreate it later.
attrs = attrs.copy()
- for prefix, namespace in nsmap.items():
+ for prefix, namespace in list(nsmap.items()):
attribute = NamespacedAttribute(
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
attrs[attribute] = namespace
@@ -149,7 +159,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
# from lxml with namespaces attached to their names, and
# turn then into NamespacedAttribute objects.
new_attrs = {}
- for attr, value in attrs.items():
+ for attr, value in list(attrs.items()):
namespace, attr = self._getNsTag(attr)
if namespace is None:
new_attrs[attr] = value
@@ -189,7 +199,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
self.nsmaps.pop()
def pi(self, target, data):
- pass
+ self.soup.endData()
+ self.soup.handle_data(target + ' ' + data)
+ self.soup.endData(ProcessingInstruction)
def data(self, content):
self.soup.handle_data(content)
@@ -207,12 +219,15 @@ class LXMLTreeBuilderForXML(TreeBuilder):
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
- return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
+ return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
- features = [LXML, HTML, FAST, PERMISSIVE]
+ NAME = LXML
+ ALTERNATE_NAMES = ["lxml-html"]
+
+ features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
is_xml = False
def default_parser(self, encoding):
@@ -224,10 +239,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
self.parser = self.parser_for(encoding)
self.parser.feed(markup)
self.parser.close()
- except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+ except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
raise ParserRejectedMarkup(str(e))
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
- return u'<html><body>%s</body></html>' % fragment
+ return '<html><body>%s</body></html>' % fragment
OpenPOWER on IntegriCloud