summaryrefslogtreecommitdiffstats
path: root/import-layers/yocto-poky/bitbake/lib/bs4/builder/_htmlparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'import-layers/yocto-poky/bitbake/lib/bs4/builder/_htmlparser.py')
-rw-r--r--import-layers/yocto-poky/bitbake/lib/bs4/builder/_htmlparser.py56
1 files changed, 30 insertions, 26 deletions
diff --git a/import-layers/yocto-poky/bitbake/lib/bs4/builder/_htmlparser.py b/import-layers/yocto-poky/bitbake/lib/bs4/builder/_htmlparser.py
index ca8d8b892..bb0a63f2f 100644
--- a/import-layers/yocto-poky/bitbake/lib/bs4/builder/_htmlparser.py
+++ b/import-layers/yocto-poky/bitbake/lib/bs4/builder/_htmlparser.py
@@ -4,10 +4,16 @@ __all__ = [
'HTMLParserTreeBuilder',
]
-from HTMLParser import (
- HTMLParser,
- HTMLParseError,
- )
+from html.parser import HTMLParser
+
+try:
+ from html.parser import HTMLParseError
+except ImportError as e:
+ # HTMLParseError is removed in Python 3.5. Since it can never be
+ # thrown in 3.5, we can just define our own class as a placeholder.
+ class HTMLParseError(Exception):
+ pass
+
import sys
import warnings
@@ -19,10 +25,10 @@ import warnings
# At the end of this file, we monkeypatch HTMLParser so that
# strict=True works well on Python 3.2.2.
major, minor, release = sys.version_info[:3]
-CONSTRUCTOR_TAKES_STRICT = (
- major > 3
- or (major == 3 and minor > 2)
- or (major == 3 and minor == 2 and release >= 3))
+CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
+CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
+CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
+
from bs4.element import (
CData,
@@ -63,7 +69,8 @@ class BeautifulSoupHTMLParser(HTMLParser):
def handle_charref(self, name):
# XXX workaround for a bug in HTMLParser. Remove this once
- # it's fixed.
+ # it's fixed in all supported versions.
+ # http://bugs.python.org/issue13633
if name.startswith('x'):
real_name = int(name.lstrip('x'), 16)
elif name.startswith('X'):
@@ -72,9 +79,9 @@ class BeautifulSoupHTMLParser(HTMLParser):
real_name = int(name)
try:
- data = unichr(real_name)
- except (ValueError, OverflowError), e:
- data = u"\N{REPLACEMENT CHARACTER}"
+ data = chr(real_name)
+ except (ValueError, OverflowError) as e:
+ data = "\N{REPLACEMENT CHARACTER}"
self.handle_data(data)
@@ -113,14 +120,6 @@ class BeautifulSoupHTMLParser(HTMLParser):
def handle_pi(self, data):
self.soup.endData()
- if data.endswith("?") and data.lower().startswith("xml"):
- # "An XHTML processing instruction using the trailing '?'
- # will cause the '?' to be included in data." - HTMLParser
- # docs.
- #
- # Strip the question mark so we don't end up with two
- # question marks.
- data = data[:-1]
self.soup.handle_data(data)
self.soup.endData(ProcessingInstruction)
@@ -128,26 +127,31 @@ class BeautifulSoupHTMLParser(HTMLParser):
class HTMLParserTreeBuilder(HTMLTreeBuilder):
is_xml = False
- features = [HTML, STRICT, HTMLPARSER]
+ picklable = True
+ NAME = HTMLPARSER
+ features = [NAME, HTML, STRICT]
def __init__(self, *args, **kwargs):
- if CONSTRUCTOR_TAKES_STRICT:
+ if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
kwargs['strict'] = False
+ if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
+ kwargs['convert_charrefs'] = False
self.parser_args = (args, kwargs)
def prepare_markup(self, markup, user_specified_encoding=None,
- document_declared_encoding=None):
+ document_declared_encoding=None, exclude_encodings=None):
"""
:return: A 4-tuple (markup, original encoding, encoding
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
- if isinstance(markup, unicode):
+ if isinstance(markup, str):
yield (markup, None, None, False)
return
try_encodings = [user_specified_encoding, document_declared_encoding]
- dammit = UnicodeDammit(markup, try_encodings, is_html=True)
+ dammit = UnicodeDammit(markup, try_encodings, is_html=True,
+ exclude_encodings=exclude_encodings)
yield (dammit.markup, dammit.original_encoding,
dammit.declared_html_encoding,
dammit.contains_replacement_characters)
@@ -158,7 +162,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
parser.soup = self.soup
try:
parser.feed(markup)
- except HTMLParseError, e:
+ except HTMLParseError as e:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e
OpenPOWER on IntegriCloud