Bug 1467516 [wpt PR 11380] - Update html5lib and six, a=testonly

Automatic update from web-platform-testsFix #7200: Update vendored html5lib to 1.0.1 This adds webencodings as another vendored package, and moves both to third_party (part of #10922). -- Fix #10922: move six into third_party and update to 1.11 -- wpt-commits: efdb898172298b29a50c2e39cd40ac191ee8b383, 7cd0b885a529734ef71afd3254df48f57f255512 wpt-pr: 11380 --HG-- rename : testing/web-platform/tests/tools/html5lib/.gitmodules => testing/web-platform/tests/tools/third_party/html5lib/.gitmodules rename : testing/web-platform/tests/tools/html5lib/CONTRIBUTING.rst => testing/web-platform/tests/tools/third_party/html5lib/CONTRIBUTING.rst rename : testing/web-platform/tests/tools/html5lib/LICENSE => testing/web-platform/tests/tools/third_party/html5lib/LICENSE rename : testing/web-platform/tests/tools/html5lib/doc/Makefile => testing/web-platform/tests/tools/third_party/html5lib/doc/Makefile rename : testing/web-platform/tests/tools/html5lib/doc/changes.rst => testing/web-platform/tests/tools/third_party/html5lib/doc/changes.rst rename : testing/web-platform/tests/tools/html5lib/doc/license.rst => testing/web-platform/tests/tools/third_party/html5lib/doc/license.rst rename : testing/web-platform/tests/tools/html5lib/doc/make.bat => testing/web-platform/tests/tools/third_party/html5lib/doc/make.bat rename : testing/web-platform/tests/tools/html5lib/doc/modules.rst => testing/web-platform/tests/tools/third_party/html5lib/doc/modules.rst rename : testing/web-platform/tests/tools/html5lib/html5lib/trie/datrie.py => testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/datrie.py rename : testing/web-platform/tests/tools/html5lib/html5lib/trie/py.py => testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/py.py rename : testing/web-platform/tests/tools/html5lib/html5lib/filters/_base.py => testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/base.py rename : testing/web-platform/tests/tools/html5lib/html5lib/tests/__init__.py => testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/__init__.py rename : testing/web-platform/tests/tools/html5lib/html5lib/tests/tokenizertotree.py => testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/tokenizertotree.py rename : testing/web-platform/tests/tools/html5lib/html5lib/tests/us-ascii.html => testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/us-ascii.html rename : testing/web-platform/tests/tools/html5lib/html5lib/tests/utf-8-bom.html => testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/utf-8-bom.html rename : testing/web-platform/tests/tools/six/MANIFEST.in => testing/web-platform/tests/tools/third_party/six/MANIFEST.in rename : testing/web-platform/tests/tools/six/documentation/Makefile => testing/web-platform/tests/tools/third_party/six/documentation/Makefile rename : testing/web-platform/tests/tools/html5lib/requirements.txt => testing/web-platform/tests/tools/third_party/six/six.egg-info/top_level.txt
2018-06-26 02:42:25 +00:00 · 2018-06-26 02:42:25 +00:00 · 02df1786d1
--- a/testing/web-platform/meta/MANIFEST.json
+++ b/testing/web-platform/meta/MANIFEST.json
@ -405432,7 +405432,7 @@
   "support"
  ],
  "./.gitmodules": [
-   "6a203e28d43909d7513daf8761281b351d2b2bd7",
+   "9e008399bdce736c7c03f7db0c3e8d624083c6b9",
   "support"
  ],
  "./.pyup.yml": [
--- a/testing/web-platform/tests/.gitmodules
+++ b/testing/web-platform/tests/.gitmodules
@ -1,6 +1,3 @@
 [submodule "tools/html5lib/html5lib/tests/testdata"]
 	path = tools/html5lib/html5lib/tests/testdata
 	url = https://github.com/html5lib/html5lib-tests.git
 [submodule "resources/webidl2/test/widlproc"]
 	path = resources/webidl2/test/widlproc
 	url = https://github.com/dontcallmedom/widlproc.git
--- a/testing/web-platform/tests/tools/html5lib/.gitignore
+++ b/testing/web-platform/tests/tools/html5lib/.gitignore
@ -1,20 +0,0 @@
 # Because we never want compiled Python
 __pycache__/
 *.pyc
 # Ignore stuff produced by distutils
 /build/
 /dist/
 /MANIFEST
 # Generated by parse.py -p
 stats.prof
 # From cover (esp. in combination with nose)
 .coverage
 # Because tox's data is inherently local
 /.tox/
 # We have no interest in built Sphinx files
 /doc/_build
--- a/testing/web-platform/tests/tools/html5lib/.travis.yml
+++ b/testing/web-platform/tests/tools/html5lib/.travis.yml
@ -1,37 +0,0 @@
 language: python
 python:
  - "2.6"
  - "2.7"
  - "3.2"
  - "3.3"
  - "3.4"
  - "pypy"
 env:
  - USE_OPTIONAL=true
  - USE_OPTIONAL=false
 matrix:
  exclude:
    - python: "2.7"
      env: USE_OPTIONAL=false
    - python: "3.4"
      env: USE_OPTIONAL=false
  include:
    - python: "2.7"
      env: USE_OPTIONAL=false FLAKE=true
    - python: "3.4"
      env: USE_OPTIONAL=false FLAKE=true
 before_install:
  - git submodule update --init --recursive
 install:
  - bash requirements-install.sh
 script:
  - nosetests
  - bash flake8-run.sh
 after_script:
  - python debug-info.py
--- a/testing/web-platform/tests/tools/html5lib/CHANGES.rst
+++ b/testing/web-platform/tests/tools/html5lib/CHANGES.rst
@ -1,171 +0,0 @@
 Change Log
 ----------
 0.9999
 ~~~~~~
 Released on XXX, 2014
 * XXX
 0.999
 ~~~~~
 Released on December 23, 2013
 * Fix #127: add work-around for CPython issue #20007: .read(0) on
  http.client.HTTPResponse drops the rest of the content.
 * Fix #115: lxml treewalker can now deal with fragments containing, at
  their root level, text nodes with non-ASCII characters on Python 2.
 0.99
 ~~~~
 Released on September 10, 2013
 * No library changes from 1.0b3; released as 0.99 as pip has changed
  behaviour from 1.4 to avoid installing pre-release versions per
  PEP 440.
 1.0b3
 ~~~~~
 Released on July 24, 2013
 * Removed ``RecursiveTreeWalker`` from ``treewalkers._base``. Any
  implementation using it should be moved to
  ``NonRecursiveTreeWalker``, as everything bundled with html5lib has
  for years.
 * Fix #67 so that ``BufferedStream`` to correctly returns a bytes
  object, thereby fixing any case where html5lib is passed a
  non-seekable RawIOBase-like object.
 1.0b2
 ~~~~~
 Released on June 27, 2013
 * Removed reordering of attributes within the serializer. There is now
  an ``alphabetical_attributes`` option which preserves the previous
  behaviour through a new filter. This allows attribute order to be
  preserved through html5lib if the tree builder preserves order.
 * Removed ``dom2sax`` from DOM treebuilders. It has been replaced by
  ``treeadapters.sax.to_sax`` which is generic and supports any
  treewalker; it also resolves all known bugs with ``dom2sax``.
 * Fix treewalker assertions on hitting bytes strings on
  Python 2. Previous to 1.0b1, treewalkers coped with mixed
  bytes/unicode data on Python 2; this reintroduces this prior
  behaviour on Python 2. Behaviour is unchanged on Python 3.
 1.0b1
 ~~~~~
 Released on May 17, 2013
 * Implementation updated to implement the `HTML specification
  <http://www.whatwg.org/specs/web-apps/current-work/>`_ as of 5th May
  2013 (`SVN <http://svn.whatwg.org/webapps/>`_ revision r7867).
 * Python 3.2+ supported in a single codebase using the ``six`` library.
 * Removed support for Python 2.5 and older.
 * Removed the deprecated Beautiful Soup 3 treebuilder.
  ``beautifulsoup4`` can use ``html5lib`` as a parser instead. Note that
  since it doesn't support namespaces, foreign content like SVG and
  MathML is parsed incorrectly.
 * Removed ``simpletree`` from the package. The default tree builder is
  now ``etree`` (using the ``xml.etree.cElementTree`` implementation if
  available, and ``xml.etree.ElementTree`` otherwise).
 * Removed the ``XHTMLSerializer`` as it never actually guaranteed its
  output was well-formed XML, and hence provided little of use.
 * Removed default DOM treebuilder, so ``html5lib.treebuilders.dom`` is no
  longer supported. ``html5lib.treebuilders.getTreeBuilder("dom")`` will
  return the default DOM treebuilder, which uses ``xml.dom.minidom``.
 * Optional heuristic character encoding detection now based on
  ``charade`` for Python 2.6 - 3.3 compatibility.
 * Optional ``Genshi`` treewalker support fixed.
 * Many bugfixes, including:
  * #33: null in attribute value breaks XML AttValue;
  * #4: nested, indirect descendant, <button> causes infinite loop;
  * `Google Code 215
    <http://code.google.com/p/html5lib/issues/detail?id=215>`_: Properly
    detect seekable streams;
  * `Google Code 206
    <http://code.google.com/p/html5lib/issues/detail?id=206>`_: add
    support for <video preload=...>, <audio preload=...>;
  * `Google Code 205
    <http://code.google.com/p/html5lib/issues/detail?id=205>`_: add
    support for <video poster=...>;
  * `Google Code 202
    <http://code.google.com/p/html5lib/issues/detail?id=202>`_: Unicode
    file breaks InputStream.
 * Source code is now mostly PEP 8 compliant.
 * Test harness has been improved and now depends on ``nose``.
 * Documentation updated and moved to http://html5lib.readthedocs.org/.
 0.95
 ~~~~
 Released on February 11, 2012
 0.90
 ~~~~
 Released on January 17, 2010
 0.11.1
 ~~~~~~
 Released on June 12, 2008
 0.11
 ~~~~
 Released on June 10, 2008
 0.10
 ~~~~
 Released on October 7, 2007
 0.9
 ~~~
 Released on March 11, 2007
 0.2
 ~~~
 Released on January 8, 2007
--- a/testing/web-platform/tests/tools/html5lib/doc/html5lib.rst
+++ b/testing/web-platform/tests/tools/html5lib/doc/html5lib.rst
@ -1,77 +0,0 @@
 html5lib Package
 ================
 :mod:`html5lib` Package
 -----------------------
 .. automodule:: html5lib.__init__
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`constants` Module
 -----------------------
 .. automodule:: html5lib.constants
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`html5parser` Module
 -------------------------
 .. automodule:: html5lib.html5parser
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`ihatexml` Module
 ----------------------
 .. automodule:: html5lib.ihatexml
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`inputstream` Module
 -------------------------
 .. automodule:: html5lib.inputstream
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`sanitizer` Module
 -----------------------
 .. automodule:: html5lib.sanitizer
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`tokenizer` Module
 -----------------------
 .. automodule:: html5lib.tokenizer
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`utils` Module
 -------------------
 .. automodule:: html5lib.utils
    :members:
    :undoc-members:
    :show-inheritance:
 Subpackages
 -----------
 .. toctree::
    html5lib.filters
    html5lib.serializer
    html5lib.treebuilders
    html5lib.treewalkers
--- a/testing/web-platform/tests/tools/html5lib/doc/html5lib.serializer.rst
+++ b/testing/web-platform/tests/tools/html5lib/doc/html5lib.serializer.rst
@ -1,19 +0,0 @@
 serializer Package
 ==================
 :mod:`serializer` Package
 -------------------------
 .. automodule:: html5lib.serializer
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`htmlserializer` Module
 ----------------------------
 .. automodule:: html5lib.serializer.htmlserializer
    :members:
    :undoc-members:
    :show-inheritance:
--- a/testing/web-platform/tests/tools/html5lib/doc/html5lib.treewalkers.rst
+++ b/testing/web-platform/tests/tools/html5lib/doc/html5lib.treewalkers.rst
@ -1,59 +0,0 @@
 treewalkers Package
 ===================
 :mod:`treewalkers` Package
 --------------------------
 .. automodule:: html5lib.treewalkers
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`_base` Module
 -------------------
 .. automodule:: html5lib.treewalkers._base
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`dom` Module
 -----------------
 .. automodule:: html5lib.treewalkers.dom
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`etree` Module
 -------------------
 .. automodule:: html5lib.treewalkers.etree
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`genshistream` Module
 --------------------------
 .. automodule:: html5lib.treewalkers.genshistream
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`lxmletree` Module
 -----------------------
 .. automodule:: html5lib.treewalkers.lxmletree
    :members:
    :undoc-members:
    :show-inheritance:
 :mod:`pulldom` Module
 ---------------------
 .. automodule:: html5lib.treewalkers.pulldom
    :members:
    :undoc-members:
    :show-inheritance:
--- a/testing/web-platform/tests/tools/html5lib/flake8-run.sh
+++ b/testing/web-platform/tests/tools/html5lib/flake8-run.sh
@ -1,14 +0,0 @@
 #!/bin/bash -e
 if [[ ! -x $(which flake8) ]]; then
  echo "fatal: flake8 not found on $PATH. Exiting."
  exit 1
 fi
 if [[ $TRAVIS != "true" || $FLAKE == "true" ]]; then
  find html5lib/ -name '*.py' -and -not -name 'constants.py' -print0 | xargs -0 flake8 --ignore=E501
  flake1=$?
  flake8 --max-line-length=99 --ignore=E126 html5lib/constants.py
  flake2=$?
  exit $[$flake1 || $flake2]
 fi
--- a/testing/web-platform/tests/tools/html5lib/html5lib/init.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/init.py
@ -1,23 +0,0 @@
 """
 HTML parsing library based on the WHATWG "HTML5"
 specification. The parser is designed to be compatible with existing
 HTML found in the wild and implements well-defined error recovery that
 is largely compatible with modern desktop web browsers.
 Example usage:
 import html5lib
 f = open("my_document.html")
 tree = html5lib.parse(f)
 """
 from __future__ import absolute_import, division, unicode_literals
 from .html5parser import HTMLParser, parse, parseFragment
 from .treebuilders import getTreeBuilder
 from .treewalkers import getTreeWalker
 from .serializer import serialize
 __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
           "getTreeWalker", "serialize"]
 __version__ = "0.9999-dev"
--- a/testing/web-platform/tests/tools/html5lib/html5lib/filters/alphabeticalattributes.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/filters/alphabeticalattributes.py
@ -1,20 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 from . import _base
 try:
    from collections import OrderedDict
 except ImportError:
    from ordereddict import OrderedDict
 class Filter(_base.Filter):
    def __iter__(self):
        for token in _base.Filter.__iter__(self):
            if token["type"] in ("StartTag", "EmptyTag"):
                attrs = OrderedDict()
                for name, value in sorted(token["data"].items(),
                                          key=lambda x: x[0]):
                    attrs[name] = value
                token["data"] = attrs
            yield token
--- a/testing/web-platform/tests/tools/html5lib/html5lib/filters/lint.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/filters/lint.py
@ -1,93 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 from gettext import gettext
 _ = gettext
 from . import _base
 from ..constants import cdataElements, rcdataElements, voidElements
 from ..constants import spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
 class LintError(Exception):
    pass
 class Filter(_base.Filter):
    def __iter__(self):
        open_elements = []
        contentModelFlag = "PCDATA"
        for token in _base.Filter.__iter__(self):
            type = token["type"]
            if type in ("StartTag", "EmptyTag"):
                name = token["name"]
                if contentModelFlag != "PCDATA":
                    raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name})
                if not isinstance(name, str):
                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
                if not name:
                    raise LintError(_("Empty tag name"))
                if type == "StartTag" and name in voidElements:
                    raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name})
                elif type == "EmptyTag" and name not in voidElements:
                    raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]})
                if type == "StartTag":
                    open_elements.append(name)
                for name, value in token["data"]:
                    if not isinstance(name, str):
                        raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name})
                    if not name:
                        raise LintError(_("Empty attribute name"))
                    if not isinstance(value, str):
                        raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value})
                if name in cdataElements:
                    contentModelFlag = "CDATA"
                elif name in rcdataElements:
                    contentModelFlag = "RCDATA"
                elif name == "plaintext":
                    contentModelFlag = "PLAINTEXT"
            elif type == "EndTag":
                name = token["name"]
                if not isinstance(name, str):
                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
                if not name:
                    raise LintError(_("Empty tag name"))
                if name in voidElements:
                    raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name})
                start_name = open_elements.pop()
                if start_name != name:
                    raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name})
                contentModelFlag = "PCDATA"
            elif type == "Comment":
                if contentModelFlag != "PCDATA":
                    raise LintError(_("Comment not in PCDATA content model flag"))
            elif type in ("Characters", "SpaceCharacters"):
                data = token["data"]
                if not isinstance(data, str):
                    raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data})
                if not data:
                    raise LintError(_("%(type)s token with empty data") % {"type": type})
                if type == "SpaceCharacters":
                    data = data.strip(spaceCharacters)
                    if data:
                        raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data})
            elif type == "Doctype":
                name = token["name"]
                if contentModelFlag != "PCDATA":
                    raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name})
                if not isinstance(name, str):
                    raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name})
                # XXX: what to do with token["data"] ?
            elif type in ("ParseError", "SerializeError"):
                pass
            else:
                raise LintError(_("Unknown token type: %(type)s") % {"type": type})
            yield token
--- a/testing/web-platform/tests/tools/html5lib/html5lib/filters/sanitizer.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/filters/sanitizer.py
@ -1,12 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 from . import _base
 from ..sanitizer import HTMLSanitizerMixin
 class Filter(_base.Filter, HTMLSanitizerMixin):
    def __iter__(self):
        for token in _base.Filter.__iter__(self):
            token = self.sanitize_token(token)
            if token:
                yield token
--- a/testing/web-platform/tests/tools/html5lib/html5lib/sanitizer.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/sanitizer.py
@ -1,271 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import re
 from xml.sax.saxutils import escape, unescape
 from .tokenizer import HTMLTokenizer
 from .constants import tokenTypes
 class HTMLSanitizerMixin(object):
    """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
    acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
                           'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
                           'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
                           'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
                           'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
                           'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
                           'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
                           'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
                           'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
                           'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
                           'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
                           'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
                           'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
    mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
                       'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
                       'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
                       'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
                       'munderover', 'none']
    svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
                    'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
                    'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
                    'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
                    'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
                    'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
    acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
                             'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
                             'background', 'balance', 'bgcolor', 'bgproperties', 'border',
                             'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
                             'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
                             'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
                             'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
                             'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
                             'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
                             'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
                             'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
                             'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
                             'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
                             'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
                             'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
                             'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload',
                             'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
                             'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
                             'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
                             'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
                             'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
                             'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
                             'width', 'wrap', 'xml:lang']
    mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
                         'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
                         'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
                         'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
                         'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
                         'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
                         'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
                         'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
                         'xlink:type', 'xmlns', 'xmlns:xlink']
    svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
                      'arabic-form', 'ascent', 'attributeName', 'attributeType',
                      'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
                      'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
                      'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
                      'fill-opacity', 'fill-rule', 'font-family', 'font-size',
                      'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
                      'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
                      'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
                      'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
                      'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
                      'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
                      'opacity', 'orient', 'origin', 'overline-position',
                      'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
                      'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
                      'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
                      'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
                      'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
                      'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
                      'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
                      'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
                      'transform', 'type', 'u1', 'u2', 'underline-position',
                      'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
                      'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
                      'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
                      'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
                      'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
                      'y1', 'y2', 'zoomAndPan']
    attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster',
                       'xlink:href', 'xml:base']
    svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
                               'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
                               'mask', 'stroke']
    svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
                            'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
                            'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
                            'set', 'use']
    acceptable_css_properties = ['azimuth', 'background-color',
                                 'border-bottom-color', 'border-collapse', 'border-color',
                                 'border-left-color', 'border-right-color', 'border-top-color', 'clear',
                                 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
                                 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
                                 'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
                                 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
                                 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
                                 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
                                 'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
                                 'white-space', 'width']
    acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
                               'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
                               'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
                               'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
                               'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
                               'transparent', 'underline', 'white', 'yellow']
    acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule',
                                 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
                                 'stroke-opacity']
    acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc',
                            'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
                            'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
                            'ssh', 'sftp', 'rtsp', 'afs']
    # subclasses may define their own versions of these constants
    allowed_elements = acceptable_elements + mathml_elements + svg_elements
    allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
    allowed_css_properties = acceptable_css_properties
    allowed_css_keywords = acceptable_css_keywords
    allowed_svg_properties = acceptable_svg_properties
    allowed_protocols = acceptable_protocols
    # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
    # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
    # attributes are parsed, and a restricted set, # specified by
    # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
    # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
    # in ALLOWED_PROTOCOLS are allowed.
    #
    #   sanitize_html('<script> do_nasty_stuff() </script>')
    #    => &lt;script> do_nasty_stuff() &lt;/script>
    #   sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
    #    => <a>Click here for $100</a>
    def sanitize_token(self, token):
        # accommodate filters which use token_type differently
        token_type = token["type"]
        if token_type in list(tokenTypes.keys()):
            token_type = tokenTypes[token_type]
        if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
                          tokenTypes["EmptyTag"]):
            if token["name"] in self.allowed_elements:
                return self.allowed_token(token, token_type)
            else:
                return self.disallowed_token(token, token_type)
        elif token_type == tokenTypes["Comment"]:
            pass
        else:
            return token
    def allowed_token(self, token, token_type):
        if "data" in token:
            attrs = dict([(name, val) for name, val in
                          token["data"][::-1]
                          if name in self.allowed_attributes])
            for attr in self.attr_val_is_uri:
                if attr not in attrs:
                    continue
                val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and
                    (val_unescaped.split(':')[0] not in
                     self.allowed_protocols)):
                    del attrs[attr]
            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                'xlink:href' in attrs and re.search('^\s*[^#\s].*',
                                                    attrs['xlink:href'])):
                del attrs['xlink:href']
            if 'style' in attrs:
                attrs['style'] = self.sanitize_css(attrs['style'])
            token["data"] = [[name, val] for name, val in list(attrs.items())]
        return token
    def disallowed_token(self, token, token_type):
        if token_type == tokenTypes["EndTag"]:
            token["data"] = "</%s>" % token["name"]
        elif token["data"]:
            attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]])
            token["data"] = "<%s%s>" % (token["name"], attrs)
        else:
            token["data"] = "<%s>" % token["name"]
        if token.get("selfClosing"):
            token["data"] = token["data"][:-1] + "/>"
        if token["type"] in list(tokenTypes.keys()):
            token["type"] = "Characters"
        else:
            token["type"] = tokenTypes["Characters"]
        del token["name"]
        return token
    def sanitize_css(self, style):
        # disallow urls
        style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
        # gauntlet
        if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
            return ''
        if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
            return ''
        clean = []
        for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style):
            if not value:
                continue
            if prop.lower() in self.allowed_css_properties:
                clean.append(prop + ': ' + value + ';')
            elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
                                                'padding']:
                for keyword in value.split():
                    if keyword not in self.acceptable_css_keywords and \
                            not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):
                        break
                else:
                    clean.append(prop + ': ' + value + ';')
            elif prop.lower() in self.allowed_svg_properties:
                clean.append(prop + ': ' + value + ';')
        return ' '.join(clean)
 class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
    def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
                 lowercaseElementName=False, lowercaseAttrName=False, parser=None):
        # Change case matching defaults as we only output lowercase html anyway
        # This solution doesn't seem ideal...
        HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
                               lowercaseElementName, lowercaseAttrName, parser=parser)
    def __iter__(self):
        for token in HTMLTokenizer.__iter__(self):
            token = self.sanitize_token(token)
            if token:
                yield token
--- a/testing/web-platform/tests/tools/html5lib/html5lib/serializer/init.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/serializer/init.py
@ -1,16 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 from .. import treewalkers
 from .htmlserializer import HTMLSerializer
 def serialize(input, tree="etree", format="html", encoding=None,
              **serializer_opts):
    # XXX: Should we cache this?
    walker = treewalkers.getTreeWalker(tree)
    if format == "html":
        s = HTMLSerializer(**serializer_opts)
    else:
        raise ValueError("type must be html")
    return s.render(walker(input), encoding)
--- a/testing/web-platform/tests/tools/html5lib/html5lib/serializer/htmlserializer.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/serializer/htmlserializer.py
@ -1,320 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 import gettext
 _ = gettext.gettext
 try:
    from functools import reduce
 except ImportError:
    pass
 from ..constants import voidElements, booleanAttributes, spaceCharacters
 from ..constants import rcdataElements, entities, xmlEntities
 from .. import utils
 from xml.sax.saxutils import escape
 spaceCharacters = "".join(spaceCharacters)
 try:
    from codecs import register_error, xmlcharrefreplace_errors
 except ImportError:
    unicode_encode_errors = "strict"
 else:
    unicode_encode_errors = "htmlentityreplace"
    encode_entity_map = {}
    is_ucs4 = len("\U0010FFFF") == 1
    for k, v in list(entities.items()):
        # skip multi-character entities
        if ((is_ucs4 and len(v) > 1) or
                (not is_ucs4 and len(v) > 2)):
            continue
        if v != "&":
            if len(v) == 2:
                v = utils.surrogatePairToCodepoint(v)
            else:
                v = ord(v)
            if v not in encode_entity_map or k.islower():
                # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
                encode_entity_map[v] = k
    def htmlentityreplace_errors(exc):
        if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
            res = []
            codepoints = []
            skip = False
            for i, c in enumerate(exc.object[exc.start:exc.end]):
                if skip:
                    skip = False
                    continue
                index = i + exc.start
                if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
                    codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2])
                    skip = True
                else:
                    codepoint = ord(c)
                codepoints.append(codepoint)
            for cp in codepoints:
                e = encode_entity_map.get(cp)
                if e:
                    res.append("&")
                    res.append(e)
                    if not e.endswith(";"):
                        res.append(";")
                else:
                    res.append("&#x%s;" % (hex(cp)[2:]))
            return ("".join(res), exc.end)
        else:
            return xmlcharrefreplace_errors(exc)
    register_error(unicode_encode_errors, htmlentityreplace_errors)
    del register_error
 class HTMLSerializer(object):
    # attribute quoting options
    quote_attr_values = False
    quote_char = '"'
    use_best_quote_char = True
    # tag syntax options
    omit_optional_tags = True
    minimize_boolean_attributes = True
    use_trailing_solidus = False
    space_before_trailing_solidus = True
    # escaping options
    escape_lt_in_attrs = False
    escape_rcdata = False
    resolve_entities = True
    # miscellaneous options
    alphabetical_attributes = False
    inject_meta_charset = True
    strip_whitespace = False
    sanitize = False
    options = ("quote_attr_values", "quote_char", "use_best_quote_char",
               "omit_optional_tags", "minimize_boolean_attributes",
               "use_trailing_solidus", "space_before_trailing_solidus",
               "escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
               "alphabetical_attributes", "inject_meta_charset",
               "strip_whitespace", "sanitize")
    def __init__(self, **kwargs):
        """Initialize HTMLSerializer.
        Keyword options (default given first unless specified) include:
        inject_meta_charset=True|False
          Whether it insert a meta element to define the character set of the
          document.
        quote_attr_values=True|False
          Whether to quote attribute values that don't require quoting
          per HTML5 parsing rules.
        quote_char=u'"'|u"'"
          Use given quote character for attribute quoting. Default is to
          use double quote unless attribute value contains a double quote,
          in which case single quotes are used instead.
        escape_lt_in_attrs=False|True
          Whether to escape < in attribute values.
        escape_rcdata=False|True
          Whether to escape characters that need to be escaped within normal
          elements within rcdata elements such as style.
        resolve_entities=True|False
          Whether to resolve named character entities that appear in the
          source tree. The XML predefined entities &lt; &gt; &amp; &quot; &apos;
          are unaffected by this setting.
        strip_whitespace=False|True
          Whether to remove semantically meaningless whitespace. (This
          compresses all whitespace to a single space except within pre.)
        minimize_boolean_attributes=True|False
          Shortens boolean attributes to give just the attribute value,
          for example <input disabled="disabled"> becomes <input disabled>.
        use_trailing_solidus=False|True
          Includes a close-tag slash at the end of the start tag of void
          elements (empty elements whose end tag is forbidden). E.g. <hr/>.
        space_before_trailing_solidus=True|False
          Places a space immediately before the closing slash in a tag
          using a trailing solidus. E.g. <hr />. Requires use_trailing_solidus.
        sanitize=False|True
          Strip all unsafe or unknown constructs from output.
          See `html5lib user documentation`_
        omit_optional_tags=True|False
          Omit start/end tags that are optional.
        alphabetical_attributes=False|True
          Reorder attributes to be in alphabetical order.
        .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
        """
        if 'quote_char' in kwargs:
            self.use_best_quote_char = False
        for attr in self.options:
            setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
        self.errors = []
        self.strict = False
    def encode(self, string):
        assert(isinstance(string, text_type))
        if self.encoding:
            return string.encode(self.encoding, unicode_encode_errors)
        else:
            return string
    def encodeStrict(self, string):
        assert(isinstance(string, text_type))
        if self.encoding:
            return string.encode(self.encoding, "strict")
        else:
            return string
    def serialize(self, treewalker, encoding=None):
        self.encoding = encoding
        in_cdata = False
        self.errors = []
        if encoding and self.inject_meta_charset:
            from ..filters.inject_meta_charset import Filter
            treewalker = Filter(treewalker, encoding)
        # WhitespaceFilter should be used before OptionalTagFilter
        # for maximum efficiently of this latter filter
        if self.strip_whitespace:
            from ..filters.whitespace import Filter
            treewalker = Filter(treewalker)
        if self.sanitize:
            from ..filters.sanitizer import Filter
            treewalker = Filter(treewalker)
        if self.omit_optional_tags:
            from ..filters.optionaltags import Filter
            treewalker = Filter(treewalker)
        # Alphabetical attributes must be last, as other filters
        # could add attributes and alter the order
        if self.alphabetical_attributes:
            from ..filters.alphabeticalattributes import Filter
            treewalker = Filter(treewalker)
        for token in treewalker:
            type = token["type"]
            if type == "Doctype":
                doctype = "<!DOCTYPE %s" % token["name"]
                if token["publicId"]:
                    doctype += ' PUBLIC "%s"' % token["publicId"]
                elif token["systemId"]:
                    doctype += " SYSTEM"
                if token["systemId"]:
                    if token["systemId"].find('"') >= 0:
                        if token["systemId"].find("'") >= 0:
                            self.serializeError(_("System identifer contains both single and double quote characters"))
                        quote_char = "'"
                    else:
                        quote_char = '"'
                    doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
                doctype += ">"
                yield self.encodeStrict(doctype)
            elif type in ("Characters", "SpaceCharacters"):
                if type == "SpaceCharacters" or in_cdata:
                    if in_cdata and token["data"].find("</") >= 0:
                        self.serializeError(_("Unexpected </ in CDATA"))
                    yield self.encode(token["data"])
                else:
                    yield self.encode(escape(token["data"]))
            elif type in ("StartTag", "EmptyTag"):
                name = token["name"]
                yield self.encodeStrict("<%s" % name)
                if name in rcdataElements and not self.escape_rcdata:
                    in_cdata = True
                elif in_cdata:
                    self.serializeError(_("Unexpected child element of a CDATA element"))
                for (attr_namespace, attr_name), attr_value in token["data"].items():
                    # TODO: Add namespace support here
                    k = attr_name
                    v = attr_value
                    yield self.encodeStrict(' ')
                    yield self.encodeStrict(k)
                    if not self.minimize_boolean_attributes or \
                        (k not in booleanAttributes.get(name, tuple())
                         and k not in booleanAttributes.get("", tuple())):
                        yield self.encodeStrict("=")
                        if self.quote_attr_values or not v:
                            quote_attr = True
                        else:
                            quote_attr = reduce(lambda x, y: x or (y in v),
                                                spaceCharacters + ">\"'=", False)
                        v = v.replace("&", "&amp;")
                        if self.escape_lt_in_attrs:
                            v = v.replace("<", "&lt;")
                        if quote_attr:
                            quote_char = self.quote_char
                            if self.use_best_quote_char:
                                if "'" in v and '"' not in v:
                                    quote_char = '"'
                                elif '"' in v and "'" not in v:
                                    quote_char = "'"
                            if quote_char == "'":
                                v = v.replace("'", "&#39;")
                            else:
                                v = v.replace('"', "&quot;")
                            yield self.encodeStrict(quote_char)
                            yield self.encode(v)
                            yield self.encodeStrict(quote_char)
                        else:
                            yield self.encode(v)
                if name in voidElements and self.use_trailing_solidus:
                    if self.space_before_trailing_solidus:
                        yield self.encodeStrict(" /")
                    else:
                        yield self.encodeStrict("/")
                yield self.encode(">")
            elif type == "EndTag":
                name = token["name"]
                if name in rcdataElements:
                    in_cdata = False
                elif in_cdata:
                    self.serializeError(_("Unexpected child element of a CDATA element"))
                yield self.encodeStrict("</%s>" % name)
            elif type == "Comment":
                data = token["data"]
                if data.find("--") >= 0:
                    self.serializeError(_("Comment contains --"))
                yield self.encodeStrict("<!--%s-->" % token["data"])
            elif type == "Entity":
                name = token["name"]
                key = name + ";"
                if key not in entities:
                    self.serializeError(_("Entity %s not recognized" % name))
                if self.resolve_entities and key not in xmlEntities:
                    data = entities[key]
                else:
                    data = "&%s;" % name
                yield self.encodeStrict(data)
            else:
                self.serializeError(token["data"])
    def render(self, treewalker, encoding=None):
        if encoding:
            return b"".join(list(self.serialize(treewalker, encoding)))
        else:
            return "".join(list(self.serialize(treewalker)))
    def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
        # XXX The idea is to make data mandatory.
        self.errors.append(data)
        if self.strict:
            raise SerializeError
 def SerializeError(Exception):
    """Error in serialized tree"""
    pass
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/README
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/README
@ -1 +0,0 @@
 Each testcase file can be run through nose (using ``nosetests``).
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/mockParser.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/mockParser.py
@ -1,41 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import sys
 import os
 if __name__ == '__main__':
    # Allow us to import from the src directory
    os.chdir(os.path.split(os.path.abspath(__file__))[0])
    sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
 from html5lib.tokenizer import HTMLTokenizer
 class HTMLParser(object):
    """ Fake parser to test tokenizer output """
    def parse(self, stream, output=True):
        tokenizer = HTMLTokenizer(stream)
        for token in tokenizer:
            if output:
                print(token)
 if __name__ == "__main__":
    x = HTMLParser()
    if len(sys.argv) > 1:
        if len(sys.argv) > 2:
            import hotshot
            import hotshot.stats
            prof = hotshot.Profile('stats.prof')
            prof.runcall(x.parse, sys.argv[1], False)
            prof.close()
            stats = hotshot.stats.load('stats.prof')
            stats.strip_dirs()
            stats.sort_stats('time')
            stats.print_stats()
        else:
            x.parse(sys.argv[1])
    else:
        print("""Usage: python mockParser.py filename [stats]
        If stats is specified the hotshots profiler will run and output the
        stats instead.
        """)
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/performance/concatenation.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/performance/concatenation.py
@ -1,36 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 def f1():
    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
    x += y + z
 def f2():
    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
    x = x + y + z
 def f3():
    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
    x = "".join((x, y, z))
 def f4():
    x = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
    y = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
    z = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ"
    x = "%s%s%s" % (x, y, z)
 import timeit
 for x in range(4):
    statement = "f%s" % (x + 1)
    t = timeit.Timer(statement, "from __main__ import " + statement)
    r = t.repeat(3, 1000000)
    print(r, min(r))
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_encoding.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_encoding.py
@ -1,67 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import os
 import unittest
 try:
    unittest.TestCase.assertEqual
 except AttributeError:
    unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
 from .support import get_data_files, TestData, test_dir, errorMessage
 from html5lib import HTMLParser, inputstream
 class Html5EncodingTestCase(unittest.TestCase):
    def test_codec_name_a(self):
        self.assertEqual(inputstream.codecName("utf-8"), "utf-8")
    def test_codec_name_b(self):
        self.assertEqual(inputstream.codecName("utf8"), "utf-8")
    def test_codec_name_c(self):
        self.assertEqual(inputstream.codecName("  utf8  "), "utf-8")
    def test_codec_name_d(self):
        self.assertEqual(inputstream.codecName("ISO_8859--1"), "windows-1252")
 def runParserEncodingTest(data, encoding):
    p = HTMLParser()
    assert p.documentEncoding is None
    p.parse(data, useChardet=False)
    encoding = encoding.lower().decode("ascii")
    assert encoding == p.documentEncoding, errorMessage(data, encoding, p.documentEncoding)
 def runPreScanEncodingTest(data, encoding):
    stream = inputstream.HTMLBinaryInputStream(data, chardet=False)
    encoding = encoding.lower().decode("ascii")
    # Very crude way to ignore irrelevant tests
    if len(data) > stream.numBytesMeta:
        return
    assert encoding == stream.charEncoding[0], errorMessage(data, encoding, stream.charEncoding[0])
 def test_encoding():
    for filename in get_data_files("encoding"):
        tests = TestData(filename, b"data", encoding=None)
        for idx, test in enumerate(tests):
            yield (runParserEncodingTest, test[b'data'], test[b'encoding'])
            yield (runPreScanEncodingTest, test[b'data'], test[b'encoding'])
 try:
    try:
        import charade  # flake8: noqa
    except ImportError:
        import chardet  # flake8: noqa
 except ImportError:
    print("charade/chardet not found, skipping chardet tests")
 else:
    def test_chardet():
        with open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt"), "rb") as fp:
            encoding = inputstream.HTMLInputStream(fp.read()).charEncoding
            assert encoding[0].lower() == "big5"
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_parser.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_parser.py
@ -1,96 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import os
 import sys
 import traceback
 import warnings
 import re
 warnings.simplefilter("error")
 from .support import get_data_files
 from .support import TestData, convert, convertExpected, treeTypes
 from html5lib import html5parser, constants
 # Run the parse error checks
 checkParseErrors = False
 # XXX - There should just be one function here but for some reason the testcase
 # format differs from the treedump format by a single space character
 def convertTreeDump(data):
    return "\n".join(convert(3)(data).split("\n")[1:])
 namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
 def runParserTest(innerHTML, input, expected, errors, treeClass,
                  namespaceHTMLElements):
    with warnings.catch_warnings(record=True) as caughtWarnings:
        warnings.simplefilter("always")
        p = html5parser.HTMLParser(tree=treeClass,
                                   namespaceHTMLElements=namespaceHTMLElements)
        try:
            if innerHTML:
                document = p.parseFragment(input, innerHTML)
            else:
                document = p.parse(input)
        except:
            errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
                                  "\nTraceback:", traceback.format_exc()])
            assert False, errorMsg
    otherWarnings = [x for x in caughtWarnings
                     if not issubclass(x.category, constants.DataLossWarning)]
    assert len(otherWarnings) == 0, [(x.category, x.message) for x in otherWarnings]
    if len(caughtWarnings):
        return
    output = convertTreeDump(p.tree.testSerializer(document))
    expected = convertExpected(expected)
    if namespaceHTMLElements:
        expected = namespaceExpected(r"\1<html \2>", expected)
    errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected,
                          "\nReceived:", output])
    assert expected == output, errorMsg
    errStr = []
    for (line, col), errorcode, datavars in p.errors:
        assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars))
        errStr.append("Line: %i Col: %i %s" % (line, col,
                                               constants.E[errorcode] % datavars))
    errorMsg2 = "\n".join(["\n\nInput:", input,
                           "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors),
                           "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)])
    if checkParseErrors:
            assert len(p.errors) == len(errors), errorMsg2
 def test_parser():
    sys.stderr.write('Testing tree builders ' + " ".join(list(treeTypes.keys())) + "\n")
    files = get_data_files('tree-construction')
    for filename in files:
        testName = os.path.basename(filename).replace(".dat", "")
        if testName in ("template",):
            continue
        tests = TestData(filename, "data")
        for index, test in enumerate(tests):
            input, errors, innerHTML, expected = [test[key] for key in
                                                  ('data', 'errors',
                                                   'document-fragment',
                                                   'document')]
            if errors:
                errors = errors.split("\n")
            for treeName, treeCls in treeTypes.items():
                for namespaceHTMLElements in (True, False):
                    yield (runParserTest, innerHTML, input, expected, errors, treeCls,
                           namespaceHTMLElements)
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_parser2.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_parser2.py
@ -1,64 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import io
 from . import support  # flake8: noqa
 from html5lib import html5parser
 from html5lib.constants import namespaces
 from html5lib import treebuilders
 import unittest
 # tests that aren't autogenerated from text files
 class MoreParserTests(unittest.TestCase):
    def setUp(self):
        self.dom_tree = treebuilders.getTreeBuilder("dom")
    def test_assertDoctypeCloneable(self):
        parser = html5parser.HTMLParser(tree=self.dom_tree)
        doc = parser.parse('<!DOCTYPE HTML>')
        self.assertTrue(doc.cloneNode(True))
    def test_line_counter(self):
        # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
        parser = html5parser.HTMLParser(tree=self.dom_tree)
        parser.parse("<pre>\nx\n&gt;\n</pre>")
    def test_namespace_html_elements_0_dom(self):
        parser = html5parser.HTMLParser(tree=self.dom_tree, namespaceHTMLElements=True)
        doc = parser.parse("<html></html>")
        self.assertTrue(doc.childNodes[0].namespaceURI == namespaces["html"])
    def test_namespace_html_elements_1_dom(self):
        parser = html5parser.HTMLParser(tree=self.dom_tree, namespaceHTMLElements=False)
        doc = parser.parse("<html></html>")
        self.assertTrue(doc.childNodes[0].namespaceURI is None)
    def test_namespace_html_elements_0_etree(self):
        parser = html5parser.HTMLParser(namespaceHTMLElements=True)
        doc = parser.parse("<html></html>")
        self.assertTrue(list(doc)[0].tag == "{%s}html" % (namespaces["html"],))
    def test_namespace_html_elements_1_etree(self):
        parser = html5parser.HTMLParser(namespaceHTMLElements=False)
        doc = parser.parse("<html></html>")
        self.assertTrue(list(doc)[0].tag == "html")
    def test_unicode_file(self):
        parser = html5parser.HTMLParser()
        parser.parse(io.StringIO("a"))
 def buildTestSuite():
    return unittest.defaultTestLoader.loadTestsFromName(__name__)
 def main():
    buildTestSuite()
    unittest.main()
 if __name__ == '__main__':
    main()
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_sanitizer.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_sanitizer.py
@ -1,105 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 try:
    import json
 except ImportError:
    import simplejson as json
 from html5lib import html5parser, sanitizer, constants, treebuilders
 def toxmlFactory():
    tree = treebuilders.getTreeBuilder("etree")
    def toxml(element):
        # encode/decode roundtrip required for Python 2.6 compatibility
        result_bytes = tree.implementation.tostring(element, encoding="utf-8")
        return result_bytes.decode("utf-8")
    return toxml
 def runSanitizerTest(name, expected, input, toxml=None):
    if toxml is None:
        toxml = toxmlFactory()
    expected = ''.join([toxml(token) for token in html5parser.HTMLParser().
                        parseFragment(expected)])
    expected = json.loads(json.dumps(expected))
    assert expected == sanitize_html(input)
 def sanitize_html(stream, toxml=None):
    if toxml is None:
        toxml = toxmlFactory()
    return ''.join([toxml(token) for token in
                    html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
                    parseFragment(stream)])
 def test_should_handle_astral_plane_characters():
    assert '<html:p xmlns:html="http://www.w3.org/1999/xhtml">\U0001d4b5 \U0001d538</html:p>' == sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
 def test_sanitizer():
    toxml = toxmlFactory()
    for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
        if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']:
            continue  # TODO
        if tag_name != tag_name.lower():
            continue  # TODO
        if tag_name == 'image':
            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
                   "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name),
                   toxml)
        elif tag_name == 'br':
            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
                   "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name),
                   toxml)
        elif tag_name in constants.voidElements:
            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
                   "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name),
                   toxml)
        else:
            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
                   "<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name),
                   toxml)
    for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
        tag_name = tag_name.upper()
        yield (runSanitizerTest, "test_should_forbid_%s_tag" % tag_name,
               "&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;" % (tag_name, tag_name),
               "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name),
               toxml)
    for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
        if attribute_name != attribute_name.lower():
            continue  # TODO
        if attribute_name == 'style':
            continue
        yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
               "<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % attribute_name,
               "<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name,
               toxml)
    for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
        attribute_name = attribute_name.upper()
        yield (runSanitizerTest, "test_should_forbid_%s_attribute" % attribute_name,
               "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
               "<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name,
               toxml)
    for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
        yield (runSanitizerTest, "test_should_allow_%s_uris" % protocol,
               "<a href=\"%s\">foo</a>" % protocol,
               """<a href="%s">foo</a>""" % protocol,
               toxml)
    for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
        yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
               "<a href=\"%s\">foo</a>" % protocol,
               """<a href="%s">foo</a>""" % protocol,
               toxml)
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_serializer.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_serializer.py
@ -1,178 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import json
 import unittest
 from .support import get_data_files
 try:
    unittest.TestCase.assertEqual
 except AttributeError:
    unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
 import html5lib
 from html5lib import constants
 from html5lib.serializer import HTMLSerializer, serialize
 from html5lib.treewalkers._base import TreeWalker
 optionals_loaded = []
 try:
    from lxml import etree
    optionals_loaded.append("lxml")
 except ImportError:
    pass
 default_namespace = constants.namespaces["html"]
 class JsonWalker(TreeWalker):
    def __iter__(self):
        for token in self.tree:
            type = token[0]
            if type == "StartTag":
                if len(token) == 4:
                    namespace, name, attrib = token[1:4]
                else:
                    namespace = default_namespace
                    name, attrib = token[1:3]
                yield self.startTag(namespace, name, self._convertAttrib(attrib))
            elif type == "EndTag":
                if len(token) == 3:
                    namespace, name = token[1:3]
                else:
                    namespace = default_namespace
                    name = token[1]
                yield self.endTag(namespace, name)
            elif type == "EmptyTag":
                if len(token) == 4:
                    namespace, name, attrib = token[1:]
                else:
                    namespace = default_namespace
                    name, attrib = token[1:]
                for token in self.emptyTag(namespace, name, self._convertAttrib(attrib)):
                    yield token
            elif type == "Comment":
                yield self.comment(token[1])
            elif type in ("Characters", "SpaceCharacters"):
                for token in self.text(token[1]):
                    yield token
            elif type == "Doctype":
                if len(token) == 4:
                    yield self.doctype(token[1], token[2], token[3])
                elif len(token) == 3:
                    yield self.doctype(token[1], token[2])
                else:
                    yield self.doctype(token[1])
            else:
                raise ValueError("Unknown token type: " + type)
    def _convertAttrib(self, attribs):
        """html5lib tree-walkers use a dict of (namespace, name): value for
        attributes, but JSON cannot represent this. Convert from the format
        in the serializer tests (a list of dicts with "namespace", "name",
        and "value" as keys) to html5lib's tree-walker format."""
        attrs = {}
        for attrib in attribs:
            name = (attrib["namespace"], attrib["name"])
            assert(name not in attrs)
            attrs[name] = attrib["value"]
        return attrs
 def serialize_html(input, options):
    options = dict([(str(k), v) for k, v in options.items()])
    stream = JsonWalker(input)
    serializer = HTMLSerializer(alphabetical_attributes=True, **options)
    return serializer.render(stream, options.get("encoding", None))
 def runSerializerTest(input, expected, options):
    encoding = options.get("encoding", None)
    if encoding:
        encode = lambda x: x.encode(encoding)
        expected = list(map(encode, expected))
    result = serialize_html(input, options)
    if len(expected) == 1:
        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
    elif result not in expected:
        assert False, "Expected: %s, Received: %s" % (expected, result)
 class EncodingTestCase(unittest.TestCase):
    def throwsWithLatin1(self, input):
        self.assertRaises(UnicodeEncodeError, serialize_html, input, {"encoding": "iso-8859-1"})
    def testDoctypeName(self):
        self.throwsWithLatin1([["Doctype", "\u0101"]])
    def testDoctypePublicId(self):
        self.throwsWithLatin1([["Doctype", "potato", "\u0101"]])
    def testDoctypeSystemId(self):
        self.throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]])
    def testCdataCharacters(self):
        runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]],
                          ["<style>&amacr;"], {"encoding": "iso-8859-1"})
    def testCharacters(self):
        runSerializerTest([["Characters", "\u0101"]],
                          ["&amacr;"], {"encoding": "iso-8859-1"})
    def testStartTagName(self):
        self.throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "\u0101", []]])
    def testEmptyTagName(self):
        self.throwsWithLatin1([["EmptyTag", "http://www.w3.org/1999/xhtml", "\u0101", []]])
    def testAttributeName(self):
        self.throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": None, "name": "\u0101", "value": "potato"}]]])
    def testAttributeValue(self):
        runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span",
                            [{"namespace": None, "name": "potato", "value": "\u0101"}]]],
                          ["<span potato=&amacr;>"], {"encoding": "iso-8859-1"})
    def testEndTagName(self):
        self.throwsWithLatin1([["EndTag", "http://www.w3.org/1999/xhtml", "\u0101"]])
    def testComment(self):
        self.throwsWithLatin1([["Comment", "\u0101"]])
 if "lxml" in optionals_loaded:
    class LxmlTestCase(unittest.TestCase):
        def setUp(self):
            self.parser = etree.XMLParser(resolve_entities=False)
            self.treewalker = html5lib.getTreeWalker("lxml")
            self.serializer = HTMLSerializer()
        def testEntityReplacement(self):
            doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
            tree = etree.fromstring(doc, parser=self.parser).getroottree()
            result = serialize(tree, tree="lxml", omit_optional_tags=False)
            self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""", result)
        def testEntityXML(self):
            doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>"""
            tree = etree.fromstring(doc, parser=self.parser).getroottree()
            result = serialize(tree, tree="lxml", omit_optional_tags=False)
            self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>""", result)
        def testEntityNoResolve(self):
            doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
            tree = etree.fromstring(doc, parser=self.parser).getroottree()
            result = serialize(tree, tree="lxml", omit_optional_tags=False,
                                          resolve_entities=False)
            self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""", result)
 def test_serializer():
    for filename in get_data_files('serializer', '*.test'):
        with open(filename) as fp:
            tests = json.load(fp)
            for index, test in enumerate(tests['tests']):
                yield runSerializerTest, test["input"], test["expected"], test.get("options", {})
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_stream.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_stream.py
@ -1,183 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 from . import support  # flake8: noqa
 import unittest
 import codecs
 from io import BytesIO
 from six.moves import http_client
 from html5lib.inputstream import (BufferedStream, HTMLInputStream,
                                  HTMLUnicodeInputStream, HTMLBinaryInputStream)
 class BufferedStreamTest(unittest.TestCase):
    def test_basic(self):
        s = b"abc"
        fp = BufferedStream(BytesIO(s))
        read = fp.read(10)
        assert read == s
    def test_read_length(self):
        fp = BufferedStream(BytesIO(b"abcdef"))
        read1 = fp.read(1)
        assert read1 == b"a"
        read2 = fp.read(2)
        assert read2 == b"bc"
        read3 = fp.read(3)
        assert read3 == b"def"
        read4 = fp.read(4)
        assert read4 == b""
    def test_tell(self):
        fp = BufferedStream(BytesIO(b"abcdef"))
        read1 = fp.read(1)
        assert fp.tell() == 1
        read2 = fp.read(2)
        assert fp.tell() == 3
        read3 = fp.read(3)
        assert fp.tell() == 6
        read4 = fp.read(4)
        assert fp.tell() == 6
    def test_seek(self):
        fp = BufferedStream(BytesIO(b"abcdef"))
        read1 = fp.read(1)
        assert read1 == b"a"
        fp.seek(0)
        read2 = fp.read(1)
        assert read2 == b"a"
        read3 = fp.read(2)
        assert read3 == b"bc"
        fp.seek(2)
        read4 = fp.read(2)
        assert read4 == b"cd"
        fp.seek(4)
        read5 = fp.read(2)
        assert read5 == b"ef"
    def test_seek_tell(self):
        fp = BufferedStream(BytesIO(b"abcdef"))
        read1 = fp.read(1)
        assert fp.tell() == 1
        fp.seek(0)
        read2 = fp.read(1)
        assert fp.tell() == 1
        read3 = fp.read(2)
        assert fp.tell() == 3
        fp.seek(2)
        read4 = fp.read(2)
        assert fp.tell() == 4
        fp.seek(4)
        read5 = fp.read(2)
        assert fp.tell() == 6
 class HTMLUnicodeInputStreamShortChunk(HTMLUnicodeInputStream):
    _defaultChunkSize = 2
 class HTMLBinaryInputStreamShortChunk(HTMLBinaryInputStream):
    _defaultChunkSize = 2
 class HTMLInputStreamTest(unittest.TestCase):
    def test_char_ascii(self):
        stream = HTMLInputStream(b"'", encoding='ascii')
        self.assertEqual(stream.charEncoding[0], 'ascii')
        self.assertEqual(stream.char(), "'")
    def test_char_utf8(self):
        stream = HTMLInputStream('\u2018'.encode('utf-8'), encoding='utf-8')
        self.assertEqual(stream.charEncoding[0], 'utf-8')
        self.assertEqual(stream.char(), '\u2018')
    def test_char_win1252(self):
        stream = HTMLInputStream("\xa9\xf1\u2019".encode('windows-1252'))
        self.assertEqual(stream.charEncoding[0], 'windows-1252')
        self.assertEqual(stream.char(), "\xa9")
        self.assertEqual(stream.char(), "\xf1")
        self.assertEqual(stream.char(), "\u2019")
    def test_bom(self):
        stream = HTMLInputStream(codecs.BOM_UTF8 + b"'")
        self.assertEqual(stream.charEncoding[0], 'utf-8')
        self.assertEqual(stream.char(), "'")
    def test_utf_16(self):
        stream = HTMLInputStream((' ' * 1025).encode('utf-16'))
        self.assertTrue(stream.charEncoding[0] in ['utf-16-le', 'utf-16-be'], stream.charEncoding)
        self.assertEqual(len(stream.charsUntil(' ', True)), 1025)
    def test_newlines(self):
        stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\r\nccc\rddddxe")
        self.assertEqual(stream.position(), (1, 0))
        self.assertEqual(stream.charsUntil('c'), "a\nbb\n")
        self.assertEqual(stream.position(), (3, 0))
        self.assertEqual(stream.charsUntil('x'), "ccc\ndddd")
        self.assertEqual(stream.position(), (4, 4))
        self.assertEqual(stream.charsUntil('e'), "x")
        self.assertEqual(stream.position(), (4, 5))
    def test_newlines2(self):
        size = HTMLUnicodeInputStream._defaultChunkSize
        stream = HTMLInputStream("\r" * size + "\n")
        self.assertEqual(stream.charsUntil('x'), "\n" * size)
    def test_position(self):
        stream = HTMLBinaryInputStreamShortChunk(codecs.BOM_UTF8 + b"a\nbb\nccc\nddde\nf\ngh")
        self.assertEqual(stream.position(), (1, 0))
        self.assertEqual(stream.charsUntil('c'), "a\nbb\n")
        self.assertEqual(stream.position(), (3, 0))
        stream.unget("\n")
        self.assertEqual(stream.position(), (2, 2))
        self.assertEqual(stream.charsUntil('c'), "\n")
        self.assertEqual(stream.position(), (3, 0))
        stream.unget("\n")
        self.assertEqual(stream.position(), (2, 2))
        self.assertEqual(stream.char(), "\n")
        self.assertEqual(stream.position(), (3, 0))
        self.assertEqual(stream.charsUntil('e'), "ccc\nddd")
        self.assertEqual(stream.position(), (4, 3))
        self.assertEqual(stream.charsUntil('h'), "e\nf\ng")
        self.assertEqual(stream.position(), (6, 1))
    def test_position2(self):
        stream = HTMLUnicodeInputStreamShortChunk("abc\nd")
        self.assertEqual(stream.position(), (1, 0))
        self.assertEqual(stream.char(), "a")
        self.assertEqual(stream.position(), (1, 1))
        self.assertEqual(stream.char(), "b")
        self.assertEqual(stream.position(), (1, 2))
        self.assertEqual(stream.char(), "c")
        self.assertEqual(stream.position(), (1, 3))
        self.assertEqual(stream.char(), "\n")
        self.assertEqual(stream.position(), (2, 0))
        self.assertEqual(stream.char(), "d")
        self.assertEqual(stream.position(), (2, 1))
    def test_python_issue_20007(self):
        """
        Make sure we have a work-around for Python bug #20007
        http://bugs.python.org/issue20007
        """
        class FakeSocket(object):
            def makefile(self, _mode, _bufsize=None):
                return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
        source = http_client.HTTPResponse(FakeSocket())
        source.begin()
        stream = HTMLInputStream(source)
        self.assertEqual(stream.charsUntil(" "), "Text")
 def buildTestSuite():
    return unittest.defaultTestLoader.loadTestsFromName(__name__)
 def main():
    buildTestSuite()
    unittest.main()
 if __name__ == '__main__':
    main()
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_treewalkers.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_treewalkers.py
@ -1,353 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import os
 import sys
 import unittest
 import warnings
 from difflib import unified_diff
 try:
    unittest.TestCase.assertEqual
 except AttributeError:
    unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
 from .support import get_data_files, TestData, convertExpected
 from html5lib import html5parser, treewalkers, treebuilders, constants
 def PullDOMAdapter(node):
    from xml.dom import Node
    from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, COMMENT, CHARACTERS
    if node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
        for childNode in node.childNodes:
            for event in PullDOMAdapter(childNode):
                yield event
    elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
        raise NotImplementedError("DOCTYPE nodes are not supported by PullDOM")
    elif node.nodeType == Node.COMMENT_NODE:
        yield COMMENT, node
    elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
        yield CHARACTERS, node
    elif node.nodeType == Node.ELEMENT_NODE:
        yield START_ELEMENT, node
        for childNode in node.childNodes:
            for event in PullDOMAdapter(childNode):
                yield event
        yield END_ELEMENT, node
    else:
        raise NotImplementedError("Node type not supported: " + str(node.nodeType))
 treeTypes = {
    "DOM": {"builder": treebuilders.getTreeBuilder("dom"),
            "walker": treewalkers.getTreeWalker("dom")},
    "PullDOM": {"builder": treebuilders.getTreeBuilder("dom"),
                "adapter": PullDOMAdapter,
                "walker": treewalkers.getTreeWalker("pulldom")},
 }
 # Try whatever etree implementations are available from a list that are
 #"supposed" to work
 try:
    import xml.etree.ElementTree as ElementTree
 except ImportError:
    pass
 else:
    treeTypes['ElementTree'] = \
        {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
         "walker": treewalkers.getTreeWalker("etree", ElementTree)}
 try:
    import xml.etree.cElementTree as ElementTree
 except ImportError:
    pass
 else:
    treeTypes['cElementTree'] = \
        {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
         "walker": treewalkers.getTreeWalker("etree", ElementTree)}
 try:
    import lxml.etree as ElementTree  # flake8: noqa
 except ImportError:
    pass
 else:
    treeTypes['lxml_native'] = \
        {"builder": treebuilders.getTreeBuilder("lxml"),
         "walker": treewalkers.getTreeWalker("lxml")}
 try:
    from genshi.core import QName, Attrs
    from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
 except ImportError:
    pass
 else:
    def GenshiAdapter(tree):
        text = None
        for token in treewalkers.getTreeWalker("dom")(tree):
            type = token["type"]
            if type in ("Characters", "SpaceCharacters"):
                if text is None:
                    text = token["data"]
                else:
                    text += token["data"]
            elif text is not None:
                yield TEXT, text, (None, -1, -1)
                text = None
            if type in ("StartTag", "EmptyTag"):
                if token["namespace"]:
                    name = "{%s}%s" % (token["namespace"], token["name"])
                else:
                    name = token["name"]
                attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value)
                               for attr, value in token["data"].items()])
                yield (START, (QName(name), attrs), (None, -1, -1))
                if type == "EmptyTag":
                    type = "EndTag"
            if type == "EndTag":
                if token["namespace"]:
                    name = "{%s}%s" % (token["namespace"], token["name"])
                else:
                    name = token["name"]
                yield END, QName(name), (None, -1, -1)
            elif type == "Comment":
                yield COMMENT, token["data"], (None, -1, -1)
            elif type == "Doctype":
                yield DOCTYPE, (token["name"], token["publicId"],
                                token["systemId"]), (None, -1, -1)
            else:
                pass  # FIXME: What to do?
        if text is not None:
            yield TEXT, text, (None, -1, -1)
    treeTypes["genshi"] = \
        {"builder": treebuilders.getTreeBuilder("dom"),
         "adapter": GenshiAdapter,
         "walker": treewalkers.getTreeWalker("genshi")}
 def concatenateCharacterTokens(tokens):
    charactersToken = None
    for token in tokens:
        type = token["type"]
        if type in ("Characters", "SpaceCharacters"):
            if charactersToken is None:
                charactersToken = {"type": "Characters", "data": token["data"]}
            else:
                charactersToken["data"] += token["data"]
        else:
            if charactersToken is not None:
                yield charactersToken
                charactersToken = None
            yield token
    if charactersToken is not None:
        yield charactersToken
 def convertTokens(tokens):
    output = []
    indent = 0
    for token in concatenateCharacterTokens(tokens):
        type = token["type"]
        if type in ("StartTag", "EmptyTag"):
            if (token["namespace"] and
                    token["namespace"] != constants.namespaces["html"]):
                if token["namespace"] in constants.prefixes:
                    name = constants.prefixes[token["namespace"]]
                else:
                    name = token["namespace"]
                name += " " + token["name"]
            else:
                name = token["name"]
            output.append("%s<%s>" % (" " * indent, name))
            indent += 2
            attrs = token["data"]
            if attrs:
                # TODO: Remove this if statement, attrs should always exist
                for (namespace, name), value in sorted(attrs.items()):
                    if namespace:
                        if namespace in constants.prefixes:
                            outputname = constants.prefixes[namespace]
                        else:
                            outputname = namespace
                        outputname += " " + name
                    else:
                        outputname = name
                    output.append("%s%s=\"%s\"" % (" " * indent, outputname, value))
            if type == "EmptyTag":
                indent -= 2
        elif type == "EndTag":
            indent -= 2
        elif type == "Comment":
            output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
        elif type == "Doctype":
            if token["name"]:
                if token["publicId"]:
                    output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
                                  (" " * indent, token["name"],
                                   token["publicId"],
                                   token["systemId"] and token["systemId"] or ""))
                elif token["systemId"]:
                    output.append("""%s<!DOCTYPE %s "" "%s">""" %
                                  (" " * indent, token["name"],
                                   token["systemId"]))
                else:
                    output.append("%s<!DOCTYPE %s>" % (" " * indent,
                                                       token["name"]))
            else:
                output.append("%s<!DOCTYPE >" % (" " * indent,))
        elif type in ("Characters", "SpaceCharacters"):
            output.append("%s\"%s\"" % (" " * indent, token["data"]))
        else:
            pass  # TODO: what to do with errors?
    return "\n".join(output)
 import re
 attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
 def sortattrs(x):
    lines = x.group(0).split("\n")
    lines.sort()
    return "\n".join(lines)
 class TokenTestCase(unittest.TestCase):
    def test_all_tokens(self):
        expected = [
            {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'},
            {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
            {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
            {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
            {'data': 'a', 'type': 'Characters'},
            {'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
            {'data': 'b', 'type': 'Characters'},
            {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
            {'data': 'c', 'type': 'Characters'},
            {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
            {'data': {}, 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
        ]
        for treeName, treeCls in treeTypes.items():
            p = html5parser.HTMLParser(tree=treeCls["builder"])
            document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
            document = treeCls.get("adapter", lambda x: x)(document)
            output = treeCls["walker"](document)
            for expectedToken, outputToken in zip(expected, output):
                self.assertEqual(expectedToken, outputToken)
 def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
    warnings.resetwarnings()
    warnings.simplefilter("error")
    try:
        p = html5parser.HTMLParser(tree=treeClass["builder"])
        if innerHTML:
            document = p.parseFragment(input, innerHTML)
        else:
            document = p.parse(input)
    except constants.DataLossWarning:
        # Ignore testcases we know we don't pass
        return
    document = treeClass.get("adapter", lambda x: x)(document)
    try:
        output = convertTokens(treeClass["walker"](document))
        output = attrlist.sub(sortattrs, output)
        expected = attrlist.sub(sortattrs, convertExpected(expected))
        diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()],
                                    [line + "\n" for line in output.splitlines()],
                                    "Expected", "Received"))
        assert expected == output, "\n".join([
            "", "Input:", input,
                "", "Expected:", expected,
                "", "Received:", output,
                "", "Diff:", diff,
        ])
    except NotImplementedError:
        pass  # Amnesty for those that confess...
 def test_treewalker():
    sys.stdout.write('Testing tree walkers ' + " ".join(list(treeTypes.keys())) + "\n")
    for treeName, treeCls in treeTypes.items():
        files = get_data_files('tree-construction')
        for filename in files:
            testName = os.path.basename(filename).replace(".dat", "")
            if testName in ("template",):
                continue
            tests = TestData(filename, "data")
            for index, test in enumerate(tests):
                (input, errors,
                 innerHTML, expected) = [test[key] for key in ("data", "errors",
                                                               "document-fragment",
                                                               "document")]
                errors = errors.split("\n")
                yield runTreewalkerTest, innerHTML, input, expected, errors, treeCls
 def set_attribute_on_first_child(docfrag, name, value, treeName):
    """naively sets an attribute on the first child of the document
    fragment passed in"""
    setter = {'ElementTree': lambda d: d[0].set,
              'DOM': lambda d: d.firstChild.setAttribute}
    setter['cElementTree'] = setter['ElementTree']
    try:
        setter.get(treeName, setter['DOM'])(docfrag)(name, value)
    except AttributeError:
        setter['ElementTree'](docfrag)(name, value)
 def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
    """tests what happens when we add attributes to the intext"""
    treeName, treeClass = tree
    parser = html5parser.HTMLParser(tree=treeClass["builder"])
    document = parser.parseFragment(intext)
    for nom, val in attrs_to_add:
        set_attribute_on_first_child(document, nom, val, treeName)
    document = treeClass.get("adapter", lambda x: x)(document)
    output = convertTokens(treeClass["walker"](document))
    output = attrlist.sub(sortattrs, output)
    if not output in expected:
        raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
 def test_treewalker_six_mix():
    """Str/Unicode mix. If str attrs added to tree"""
    # On Python 2.x string literals are of type str. Unless, like this
    # file, the programmer imports unicode_literals from __future__.
    # In that case, string literals become objects of type unicode.
    # This test simulates a Py2 user, modifying attributes on a document
    # fragment but not using the u'' syntax nor importing unicode_literals
    sm_tests = [
        ('<a href="http://example.com">Example</a>',
         [(str('class'), str('test123'))],
         '<a>\n  class="test123"\n  href="http://example.com"\n  "Example"'),
        ('<link href="http://example.com/cow">',
         [(str('rel'), str('alternate'))],
         '<link>\n  href="http://example.com/cow"\n  rel="alternate"\n  "Example"')
    ]
    for tree in treeTypes.items():
        for intext, attrs, expected in sm_tests:
            yield runTreewalkerEditTest, intext, expected, attrs, tree
--- a/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_whitespace_filter.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/tests/test_whitespace_filter.py
@ -1,133 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 import unittest
 from html5lib.filters.whitespace import Filter
 from html5lib.constants import spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
 try:
    unittest.TestCase.assertEqual
 except AttributeError:
    unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
 class TestCase(unittest.TestCase):
    def runTest(self, input, expected):
        output = list(Filter(input))
        errorMsg = "\n".join(["\n\nInput:", str(input),
                              "\nExpected:", str(expected),
                              "\nReceived:", str(output)])
        self.assertEqual(output, expected, errorMsg)
    def runTestUnmodifiedOutput(self, input):
        self.runTest(input, input)
    def testPhrasingElements(self):
        self.runTestUnmodifiedOutput(
            [{"type": "Characters", "data": "This is a "},
             {"type": "StartTag", "name": "span", "data": []},
             {"type": "Characters", "data": "phrase"},
             {"type": "EndTag", "name": "span", "data": []},
             {"type": "SpaceCharacters", "data": " "},
             {"type": "Characters", "data": "with"},
             {"type": "SpaceCharacters", "data": " "},
             {"type": "StartTag", "name": "em", "data": []},
             {"type": "Characters", "data": "emphasised text"},
             {"type": "EndTag", "name": "em", "data": []},
             {"type": "Characters", "data": " and an "},
             {"type": "StartTag", "name": "img", "data": [["alt", "image"]]},
             {"type": "Characters", "data": "."}])
    def testLeadingWhitespace(self):
        self.runTest(
            [{"type": "StartTag", "name": "p", "data": []},
             {"type": "SpaceCharacters", "data": spaceCharacters},
             {"type": "Characters", "data": "foo"},
             {"type": "EndTag", "name": "p", "data": []}],
            [{"type": "StartTag", "name": "p", "data": []},
             {"type": "SpaceCharacters", "data": " "},
             {"type": "Characters", "data": "foo"},
             {"type": "EndTag", "name": "p", "data": []}])
    def testLeadingWhitespaceAsCharacters(self):
        self.runTest(
            [{"type": "StartTag", "name": "p", "data": []},
             {"type": "Characters", "data": spaceCharacters + "foo"},
             {"type": "EndTag", "name": "p", "data": []}],
            [{"type": "StartTag", "name": "p", "data": []},
             {"type": "Characters", "data": " foo"},
             {"type": "EndTag", "name": "p", "data": []}])
    def testTrailingWhitespace(self):
        self.runTest(
            [{"type": "StartTag", "name": "p", "data": []},
             {"type": "Characters", "data": "foo"},
             {"type": "SpaceCharacters", "data": spaceCharacters},
             {"type": "EndTag", "name": "p", "data": []}],
            [{"type": "StartTag", "name": "p", "data": []},
             {"type": "Characters", "data": "foo"},
             {"type": "SpaceCharacters", "data": " "},
             {"type": "EndTag", "name": "p", "data": []}])
    def testTrailingWhitespaceAsCharacters(self):
        self.runTest(
            [{"type": "StartTag", "name": "p", "data": []},
             {"type": "Characters", "data": "foo" + spaceCharacters},
             {"type": "EndTag", "name": "p", "data": []}],
            [{"type": "StartTag", "name": "p", "data": []},
             {"type": "Characters", "data": "foo "},
             {"type": "EndTag", "name": "p", "data": []}])
    def testWhitespace(self):
        self.runTest(
            [{"type": "StartTag", "name": "p", "data": []},
             {"type": "Characters", "data": "foo" + spaceCharacters + "bar"},
             {"type": "EndTag", "name": "p", "data": []}],
            [{"type": "StartTag", "name": "p", "data": []},
             {"type": "Characters", "data": "foo bar"},
             {"type": "EndTag", "name": "p", "data": []}])
    def testLeadingWhitespaceInPre(self):
        self.runTestUnmodifiedOutput(
            [{"type": "StartTag", "name": "pre", "data": []},
             {"type": "SpaceCharacters", "data": spaceCharacters},
             {"type": "Characters", "data": "foo"},
             {"type": "EndTag", "name": "pre", "data": []}])
    def testLeadingWhitespaceAsCharactersInPre(self):
        self.runTestUnmodifiedOutput(
            [{"type": "StartTag", "name": "pre", "data": []},
             {"type": "Characters", "data": spaceCharacters + "foo"},
             {"type": "EndTag", "name": "pre", "data": []}])
    def testTrailingWhitespaceInPre(self):
        self.runTestUnmodifiedOutput(
            [{"type": "StartTag", "name": "pre", "data": []},
             {"type": "Characters", "data": "foo"},
             {"type": "SpaceCharacters", "data": spaceCharacters},
             {"type": "EndTag", "name": "pre", "data": []}])
    def testTrailingWhitespaceAsCharactersInPre(self):
        self.runTestUnmodifiedOutput(
            [{"type": "StartTag", "name": "pre", "data": []},
             {"type": "Characters", "data": "foo" + spaceCharacters},
             {"type": "EndTag", "name": "pre", "data": []}])
    def testWhitespaceInPre(self):
        self.runTestUnmodifiedOutput(
            [{"type": "StartTag", "name": "pre", "data": []},
             {"type": "Characters", "data": "foo" + spaceCharacters + "bar"},
             {"type": "EndTag", "name": "pre", "data": []}])
 def buildTestSuite():
    return unittest.defaultTestLoader.loadTestsFromName(__name__)
 def main():
    buildTestSuite()
    unittest.main()
 if __name__ == "__main__":
    main()
--- a/testing/web-platform/tests/tools/html5lib/html5lib/treeadapters/init.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/treeadapters/init.py
--- a/testing/web-platform/tests/tools/html5lib/html5lib/treebuilders/init.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/treebuilders/init.py
@ -1,76 +0,0 @@
 """A collection of modules for building different kinds of tree from
 HTML documents.
 To create a treebuilder for a new type of tree, you need to do
 implement several things:
 1) A set of classes for various types of elements: Document, Doctype,
 Comment, Element. These must implement the interface of
 _base.treebuilders.Node (although comment nodes have a different
 signature for their constructor, see treebuilders.etree.Comment)
 Textual content may also be implemented as another node type, or not, as
 your tree implementation requires.
 2) A treebuilder object (called TreeBuilder by convention) that
 inherits from treebuilders._base.TreeBuilder. This has 4 required attributes:
 documentClass - the class to use for the bottommost node of a document
 elementClass - the class to use for HTML Elements
 commentClass - the class to use for comments
 doctypeClass - the class to use for doctypes
 It also has one required method:
 getDocument - Returns the root node of the complete document tree
 3) If you wish to run the unit tests, you must also create a
 testSerializer method on your treebuilder which accepts a node and
 returns a string containing Node and its children serialized according
 to the format used in the unittests
 """
 from __future__ import absolute_import, division, unicode_literals
 from ..utils import default_etree
 treeBuilderCache = {}
 def getTreeBuilder(treeType, implementation=None, **kwargs):
    """Get a TreeBuilder class for various types of tree with built-in support
    treeType - the name of the tree type required (case-insensitive). Supported
               values are:
               "dom" - A generic builder for DOM implementations, defaulting to
                       a xml.dom.minidom based implementation.
               "etree" - A generic builder for tree implementations exposing an
                         ElementTree-like interface, defaulting to
                         xml.etree.cElementTree if available and
                         xml.etree.ElementTree if not.
               "lxml" - A etree-based builder for lxml.etree, handling
                        limitations of lxml's implementation.
    implementation - (Currently applies to the "etree" and "dom" tree types). A
                      module implementing the tree type e.g.
                      xml.etree.ElementTree or xml.etree.cElementTree."""
    treeType = treeType.lower()
    if treeType not in treeBuilderCache:
        if treeType == "dom":
            from . import dom
            # Come up with a sane default (pref. from the stdlib)
            if implementation is None:
                from xml.dom import minidom
                implementation = minidom
            # NEVER cache here, caching is done in the dom submodule
            return dom.getDomModule(implementation, **kwargs).TreeBuilder
        elif treeType == "lxml":
            from . import etree_lxml
            treeBuilderCache[treeType] = etree_lxml.TreeBuilder
        elif treeType == "etree":
            from . import etree
            if implementation is None:
                implementation = default_etree
            # NEVER cache here, caching is done in the etree submodule
            return etree.getETreeModule(implementation, **kwargs).TreeBuilder
        else:
            raise ValueError("""Unrecognised treebuilder "%s" """ % treeType)
    return treeBuilderCache.get(treeType)
--- a/testing/web-platform/tests/tools/html5lib/html5lib/treewalkers/init.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/treewalkers/init.py
@ -1,57 +0,0 @@
 """A collection of modules for iterating through different kinds of
 tree, generating tokens identical to those produced by the tokenizer
 module.
 To create a tree walker for a new type of tree, you need to do
 implement a tree walker object (called TreeWalker by convention) that
 implements a 'serialize' method taking a tree as sole argument and
 returning an iterator generating tokens.
 """
 from __future__ import absolute_import, division, unicode_literals
 import sys
 from ..utils import default_etree
 treeWalkerCache = {}
 def getTreeWalker(treeType, implementation=None, **kwargs):
    """Get a TreeWalker class for various types of tree with built-in support
    treeType - the name of the tree type required (case-insensitive). Supported
               values are:
                "dom" - The xml.dom.minidom DOM implementation
                "pulldom" - The xml.dom.pulldom event stream
                "etree" - A generic walker for tree implementations exposing an
                          elementtree-like interface (known to work with
                          ElementTree, cElementTree and lxml.etree).
                "lxml" - Optimized walker for lxml.etree
                "genshi" - a Genshi stream
    implementation - (Currently applies to the "etree" tree type only). A module
                      implementing the tree type e.g. xml.etree.ElementTree or
                      cElementTree."""
    treeType = treeType.lower()
    if treeType not in treeWalkerCache:
        if treeType in ("dom", "pulldom"):
            name = "%s.%s" % (__name__, treeType)
            __import__(name)
            mod = sys.modules[name]
            treeWalkerCache[treeType] = mod.TreeWalker
        elif treeType == "genshi":
            from . import genshistream
            treeWalkerCache[treeType] = genshistream.TreeWalker
        elif treeType == "lxml":
            from . import lxmletree
            treeWalkerCache[treeType] = lxmletree.TreeWalker
        elif treeType == "etree":
            from . import etree
            if implementation is None:
                implementation = default_etree
            # XXX: NEVER cache here, caching is done in the etree submodule
            return etree.getETreeModule(implementation, **kwargs).TreeWalker
    return treeWalkerCache.get(treeType)
--- a/testing/web-platform/tests/tools/html5lib/html5lib/treewalkers/pulldom.py
+++ b/testing/web-platform/tests/tools/html5lib/html5lib/treewalkers/pulldom.py
@ -1,63 +0,0 @@
 from __future__ import absolute_import, division, unicode_literals
 from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
    COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
 from . import _base
 from ..constants import voidElements
 class TreeWalker(_base.TreeWalker):
    def __iter__(self):
        ignore_until = None
        previous = None
        for event in self.tree:
            if previous is not None and \
                    (ignore_until is None or previous[1] is ignore_until):
                if previous[1] is ignore_until:
                    ignore_until = None
                for token in self.tokens(previous, event):
                    yield token
                    if token["type"] == "EmptyTag":
                        ignore_until = previous[1]
            previous = event
        if ignore_until is None or previous[1] is ignore_until:
            for token in self.tokens(previous, None):
                yield token
        elif ignore_until is not None:
            raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
    def tokens(self, event, next):
        type, node = event
        if type == START_ELEMENT:
            name = node.nodeName
            namespace = node.namespaceURI
            attrs = {}
            for attr in list(node.attributes.keys()):
                attr = node.getAttributeNode(attr)
                attrs[(attr.namespaceURI, attr.localName)] = attr.value
            if name in voidElements:
                for token in self.emptyTag(namespace,
                                           name,
                                           attrs,
                                           not next or next[1] is not node):
                    yield token
            else:
                yield self.startTag(namespace, name, attrs)
        elif type == END_ELEMENT:
            name = node.nodeName
            namespace = node.namespaceURI
            if name not in voidElements:
                yield self.endTag(namespace, name)
        elif type == COMMENT:
            yield self.comment(node.nodeValue)
        elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
            for token in self.text(node.nodeValue):
                yield token
        else:
            yield self.unknown(type)
--- a/testing/web-platform/tests/tools/html5lib/requirements-install.sh
+++ b/testing/web-platform/tests/tools/html5lib/requirements-install.sh
@ -1,16 +0,0 @@
 #!/bin/bash -e
 if [[ $USE_OPTIONAL != "true" && $USE_OPTIONAL != "false" ]]; then
  echo "fatal: \$USE_OPTIONAL not set to true or false. Exiting."
  exit 1
 fi
 pip install -r requirements-test.txt
 if [[ $USE_OPTIONAL == "true" && $TRAVIS_PYTHON_VERSION != "pypy" ]]; then
  if [[ $TRAVIS_PYTHON_VERSION == "2.6" ]]; then
    pip install --allow-external Genshi --allow-insecure Genshi -r requirements-optional-2.6.txt
  else
    pip install --allow-external Genshi --allow-insecure Genshi -r requirements-optional-cpython.txt
  fi
 fi
--- a/testing/web-platform/tests/tools/html5lib/requirements-optional-2.6.txt
+++ b/testing/web-platform/tests/tools/html5lib/requirements-optional-2.6.txt
@ -1,5 +0,0 @@
 -r requirements-optional-cpython.txt
 # Can be used to force attributes to be serialized in alphabetical
 # order.
 ordereddict
--- a/testing/web-platform/tests/tools/html5lib/requirements-optional-cpython.txt
+++ b/testing/web-platform/tests/tools/html5lib/requirements-optional-cpython.txt
@ -1,5 +0,0 @@
 -r requirements-optional.txt
 # lxml is supported with its own treebuilder ("lxml") and otherwise
 # uses the standard ElementTree support
 lxml
--- a/testing/web-platform/tests/tools/html5lib/requirements-optional.txt
+++ b/testing/web-platform/tests/tools/html5lib/requirements-optional.txt
@ -1,13 +0,0 @@
 -r requirements.txt
 # We support a Genshi treewalker that can be used to serialize Genshi
 # streams.
 genshi
 # DATrie can be used in place of our Python trie implementation for
 # slightly better parsing performance.
 datrie
 # charade can be used as a fallback in case we are unable to determine
 # the encoding of a document.
 charade
--- a/testing/web-platform/tests/tools/html5lib/requirements-test.txt
+++ b/testing/web-platform/tests/tools/html5lib/requirements-test.txt
@ -1,5 +0,0 @@
 -r requirements.txt
 flake8
 nose
 ordereddict # Python 2.6
--- a/testing/web-platform/tests/tools/html5lib/setup.py
+++ b/testing/web-platform/tests/tools/html5lib/setup.py
@ -1,44 +0,0 @@
 from distutils.core import setup
 import os
 import codecs
 classifiers=[
    'Development Status :: 5 - Production/Stable',
    'Intended Audience :: Developers',
    'License :: OSI Approved :: MIT License',
    'Operating System :: OS Independent',
    'Programming Language :: Python',
    'Programming Language :: Python :: 2',
    'Programming Language :: Python :: 2.6',
    'Programming Language :: Python :: 2.7',
    'Programming Language :: Python :: 3',
    'Programming Language :: Python :: 3.2',
    'Programming Language :: Python :: 3.3',
    'Topic :: Software Development :: Libraries :: Python Modules',
    'Topic :: Text Processing :: Markup :: HTML'
    ]
 packages = ['html5lib'] + ['html5lib.'+name
                           for name in os.listdir(os.path.join('html5lib'))
                           if os.path.isdir(os.path.join('html5lib', name)) and
                           not name.startswith('.') and name != 'tests']
 current_dir = os.path.dirname(__file__)
 with codecs.open(os.path.join(current_dir, 'README.rst'), 'r', 'utf8') as readme_file:
    with codecs.open(os.path.join(current_dir, 'CHANGES.rst'), 'r', 'utf8') as changes_file:
        long_description = readme_file.read() + '\n' + changes_file.read()
 setup(name='html5lib',
      version='0.9999-dev',
      url='https://github.com/html5lib/html5lib-python',
      license="MIT License",
      description='HTML parser based on the WHATWG HTML specifcation',
      long_description=long_description,
      classifiers=classifiers,
      maintainer='James Graham',
      maintainer_email='james@hoppipolla.co.uk',
      packages=packages,
      install_requires=[
          'six',
      ],
      )
--- a/testing/web-platform/tests/tools/html5lib/tox.ini
+++ b/testing/web-platform/tests/tools/html5lib/tox.ini
@ -1,30 +0,0 @@
 [tox]
 envlist = py26,py27,py32,py33,py34,pypy
 [testenv]
 deps =
  -r{toxinidir}/requirements-optional-cpython.txt
  flake8
  nose
 commands =
  {envbindir}/nosetests -q
  {toxinidir}/flake8-run.sh
 install_command =
  pip install {opts} {packages}
 [testenv:pypy]
 # lxml doesn't work and datrie doesn't make sense
 # (it's slower than the pure-python version)
 deps =
  charade
  flake8
  Genshi
  nose
  six
 [testenv:py26]
 basepython = python2.6
 deps =
  -r{toxinidir}/requirements-optional-2.6.txt
  flake8
  nose
--- a/testing/web-platform/tests/tools/html5lib/utils/iana_parse.py
+++ b/testing/web-platform/tests/tools/html5lib/utils/iana_parse.py
@ -1,24 +0,0 @@
 #!/usr/bin/env python
 import sys
 import urllib.request, urllib.error, urllib.parse
 import codecs
 def main():
    encodings = []
    f = urllib.request.urlopen(sys.argv[1])
    for line in f:
        if line.startswith("Name: ") or line.startswith("Alias: "):
            enc = line.split()[1]
            try:
                codecs.lookup(enc)
                if enc.lower not in encodings:
                    encodings.append(enc.lower())
            except LookupError:
                pass
    sys.stdout.write("encodings = frozenset((\n")
    for enc in encodings:
        sys.stdout.write('    "%s",\n'%enc)
    sys.stdout.write('    ))')
 if __name__ == "__main__":
    main()
--- a/testing/web-platform/tests/tools/html5lib/utils/spider.py
+++ b/testing/web-platform/tests/tools/html5lib/utils/spider.py
@ -1,122 +0,0 @@
 #!/usr/bin/env python
 """Spider to try and find bugs in the parser. Requires httplib2 and elementtree
 usage:
 import spider
 s = spider.Spider()
 s.spider("http://www.google.com", maxURLs=100)
 """
 import urllib.request, urllib.error, urllib.parse
 import urllib.robotparser
 import md5
 import httplib2
 import html5lib
 from html5lib.treebuilders import etree
 class Spider(object):
    def __init__(self):
        self.unvisitedURLs = set()
        self.visitedURLs = set()
        self.buggyURLs=set()
        self.robotParser = urllib.robotparser.RobotFileParser()
        self.contentDigest = {}
        self.http = httplib2.Http(".cache")
    def run(self, initialURL, maxURLs=1000):
        urlNumber = 0
        self.visitedURLs.add(initialURL)
        content = self.loadURL(initialURL)
        while maxURLs is None or urlNumber < maxURLs:
            if content is not None:
                self.parse(content)
                urlNumber += 1
            if not self.unvisitedURLs:
                break
            content = self.loadURL(self.unvisitedURLs.pop())
    def parse(self, content):
        failed = False
        p = html5lib.HTMLParser(tree=etree.TreeBuilder)
        try:
            tree = p.parse(content)
        except:
            self.buggyURLs.add(self.currentURL)
            failed = True
            print("BUGGY:", self.currentURL)
        self.visitedURLs.add(self.currentURL)
        if not failed:
            self.updateURLs(tree)
    def loadURL(self, url):
        resp, content = self.http.request(url, "GET")
        self.currentURL = url
        digest = md5.md5(content).hexdigest()
        if digest in self.contentDigest:
            content = None
            self.visitedURLs.add(url)
        else:
            self.contentDigest[digest] = url
        if resp['status'] != "200":
            content = None
        return content
    def updateURLs(self, tree):
        """Take all the links in the current document, extract the URLs and
        update the list of visited and unvisited URLs according to whether we
        have seen them before or not"""
        urls = set()
        #Remove all links we have already visited
        for link in tree.findall(".//a"):
                try:
                    url = urllib.parse.urldefrag(link.attrib['href'])[0]
                    if (url and url not in self.unvisitedURLs and url
                        not in self.visitedURLs):
                        urls.add(url)
                except KeyError:
                    pass
        #Remove all non-http URLs and a dd a sutiable base URL where that is
        #missing
        newUrls = set()
        for url in urls:
            splitURL = list(urllib.parse.urlsplit(url))
            if splitURL[0] != "http":
                continue
            if splitURL[1] == "":
                splitURL[1] = urllib.parse.urlsplit(self.currentURL)[1]
            newUrls.add(urllib.parse.urlunsplit(splitURL))
        urls = newUrls
        responseHeaders = {}
        #Now we want to find the content types of the links we haven't visited
        for url in urls:
            try:
                resp, content = self.http.request(url, "HEAD")
                responseHeaders[url] = resp
            except AttributeError as KeyError:
                #Don't know why this happens
                pass
        #Remove links not of content-type html or pages not found
        #XXX - need to deal with other status codes?
        toVisit = set([url for url in urls if url in responseHeaders and
                      "html" in responseHeaders[url]['content-type'] and
                      responseHeaders[url]['status'] == "200"])
        #Now check we are allowed to spider the page
        for url in toVisit:
            robotURL = list(urllib.parse.urlsplit(url)[:2])
            robotURL.extend(["robots.txt", "", ""])
            robotURL = urllib.parse.urlunsplit(robotURL)
            self.robotParser.set_url(robotURL)
            if not self.robotParser.can_fetch("*", url):
                toVisit.remove(url)
        self.visitedURLs.update(urls)
        self.unvisitedURLs.update(toVisit)
--- a/testing/web-platform/tests/tools/localpaths.py
+++ b/testing/web-platform/tests/tools/localpaths.py
@ -5,14 +5,15 @@ here = os.path.abspath(os.path.split(__file__)[0])
 repo_root = os.path.abspath(os.path.join(here, os.pardir))
 sys.path.insert(0, os.path.join(here))
 sys.path.insert(0, os.path.join(here, "six"))
 sys.path.insert(0, os.path.join(here, "html5lib"))
 sys.path.insert(0, os.path.join(here, "wptserve"))
 sys.path.insert(0, os.path.join(here, "pywebsocket"))
 sys.path.insert(0, os.path.join(here, "third_party", "attrs", "src"))
 sys.path.insert(0, os.path.join(here, "third_party", "funcsigs"))
 sys.path.insert(0, os.path.join(here, "third_party", "html5lib"))
 sys.path.insert(0, os.path.join(here, "third_party", "pluggy"))
 sys.path.insert(0, os.path.join(here, "third_party", "py"))
 sys.path.insert(0, os.path.join(here, "third_party", "pytest"))
 sys.path.insert(0, os.path.join(here, "third_party", "six"))
 sys.path.insert(0, os.path.join(here, "third_party", "webencodings"))
 sys.path.insert(0, os.path.join(here, "webdriver"))
 sys.path.insert(0, os.path.join(here, "wptrunner"))
--- a/testing/web-platform/tests/tools/six/.gitignore
+++ b/testing/web-platform/tests/tools/six/.gitignore
@ -1,9 +0,0 @@
 *#
 *.py[co]
 *.sw[po]
 *~
 MANIFEST
 documentation/_build
 \#*
 .tox
 six.egg-info
--- a/testing/web-platform/tests/tools/six/.hgignore
+++ b/testing/web-platform/tests/tools/six/.hgignore
@ -1,8 +0,0 @@
 syntax: glob
 *.pyc
 dist
 MANIFEST
 documentation/_build
 .tox
 .gitignore
 six.egg-info
--- a/testing/web-platform/tests/tools/six/CONTRIBUTORS
+++ b/testing/web-platform/tests/tools/six/CONTRIBUTORS
@ -1,22 +0,0 @@
 The primary author and maintainer of six is Benjamin Peterson. He would like to
 acknowledge the following people who submitted bug reports, pull requests, and
 otherwise worked to improve six:
 Marc Abramowitz
 Alexander Artemenko
 Aymeric Augustin
 Ned Batchelder
 Jason R. Coombs
 Julien Danjou
 Ben Darnell
 Ben Davis
 Joshua Harlow
 Anselm Kruis
 Alexander Lukanin
 James Mills
 Sridhar Ratnakumar
 Erik Rose
 Peter Ruibal
 Miroslav Shubernetskiy
 If you think you belong on this list, please let me know! --Benjamin
--- a/testing/web-platform/tests/tools/six/README
+++ b/testing/web-platform/tests/tools/six/README
@ -1,16 +0,0 @@
 Six is a Python 2 and 3 compatibility library.  It provides utility functions
 for smoothing over the differences between the Python versions with the goal of
 writing Python code that is compatible on both Python versions.  See the
 documentation for more information on what is provided.
 Six supports every Python version since 2.5.  It is contained in only one Python
 file, so it can be easily copied into your project. (The copyright and license
 notice must be retained.)
 Online documentation is at http://pythonhosted.org/six/.
 Bugs can be reported to https://bitbucket.org/gutworth/six.  The code can also
 be found there.
 For questions about six or porting in general, email the python-porting mailing
 list: http://mail.python.org/mailman/listinfo/python-porting
--- a/testing/web-platform/tests/tools/six/setup.cfg
+++ b/testing/web-platform/tests/tools/six/setup.cfg
@ -1,2 +0,0 @@
 [wheel]
 universal = 1
--- a/testing/web-platform/tests/tools/six/setup.py
+++ b/testing/web-platform/tests/tools/six/setup.py
@ -1,32 +0,0 @@
 from __future__ import with_statement
 try:
    from setuptools import setup
 except ImportError:
    from distutils.core import setup
 import six
 six_classifiers = [
    "Programming Language :: Python :: 2",
    "Programming Language :: Python :: 3",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: MIT License",
    "Topic :: Software Development :: Libraries",
    "Topic :: Utilities",
 ]
 with open("README", "r") as fp:
    six_long_description = fp.read()
 setup(name="six",
      version=six.__version__,
      author="Benjamin Peterson",
      author_email="benjamin@python.org",
      url="http://pypi.python.org/pypi/six/",
      py_modules=["six"],
      description="Python 2 and 3 compatibility utilities",
      long_description=six_long_description,
      license="MIT",
      classifiers=six_classifiers
      )
--- a/testing/web-platform/tests/tools/six/tox.ini
+++ b/testing/web-platform/tests/tools/six/tox.ini
@ -1,12 +0,0 @@
 [tox]
 envlist=py25,py26,py27,py31,py32,py33,py34,pypy
 indexserver=
    default = http://pypi.python.org/simple
    testrun = http://pypi.testrun.org
 [testenv]
 deps=pytest
 commands= py.test -rfsxX {posargs}
 [pytest]
 minversion=2.2.0
--- a/testing/web-platform/tests/tools/third_party/html5lib/.appveyor.yml
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.appveyor.yml
@ -0,0 +1,31 @@
 # To activate, change the Appveyor settings to use `.appveyor.yml`.
 environment:
  global:
    PATH: "C:\\Python27\\Scripts\\;%PATH%"
    PYTEST_COMMAND: "coverage run -m pytest"
  matrix:
    - TOXENV: py27-base
    - TOXENV: py27-optional
    - TOXENV: py33-base
    - TOXENV: py33-optional
    - TOXENV: py34-base
    - TOXENV: py34-optional
    - TOXENV: py35-base
    - TOXENV: py35-optional
    - TOXENV: py36-base
    - TOXENV: py36-optional
 install:
  - git submodule update --init --recursive
  - python -m pip install tox codecov
 build: off
 test_script:
  - tox
 after_test:
  - python debug-info.py
 on_success:
  - codecov
--- a/testing/web-platform/tests/tools/third_party/html5lib/.coveragerc
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.coveragerc
@ -0,0 +1,8 @@
 [run]
 branch = True
 source = html5lib
 [paths]
 source =
   html5lib
   .tox/*/lib/python*/site-packages/html5lib
--- a/testing/web-platform/tests/tools/third_party/html5lib/.gitignore
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.gitignore
@ -0,0 +1,85 @@
 # Copyright (c) 2014 GitHub, Inc.
 #
 # Permission is hereby granted,  free of charge,  to any person obtaining a
 # copy of this software and associated documentation files (the "Software"),
 # to deal in the Software without restriction, including without limitation
 # the rights to  use, copy, modify, merge, publish, distribute, sublicense,
 # and/or sell copies of the Software, and to permit persons to whom the
 # Software is furnished to do so, subject to the following conditions:
 #
 # The above copyright notice and this permission notice shall be included in
 # all copies or substantial portions of the Software.
 #
 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 # DEALINGS IN THE SOFTWARE.
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
 *$py.class
 # C extensions
 *.so
 # Distribution / packaging
 .Python
 env/
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 *.egg-info/
 .installed.cfg
 *.egg
 MANIFEST
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 *.manifest
 *.spec
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
 # Unit test / coverage reports
 htmlcov/
 .tox/
 .coverage
 .coverage.*
 .cache
 nosetests.xml
 coverage.xml
 *,cover
 # Translations
 *.mo
 *.pot
 # Django stuff:
 *.log
 # Sphinx documentation
 doc/_build/
 # PyBuilder
 target/
 # Generated by parse.py -p
 stats.prof
 # IDE
 .idea
--- a/testing/web-platform/tests/tools/third_party/html5lib/.gitmodules
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.gitmodules
--- a/testing/web-platform/tests/tools/third_party/html5lib/.prospector.yaml
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.prospector.yaml
@ -0,0 +1,21 @@
 strictness: veryhigh
 doc-warnings: false
 test-warnings: false
 max-line-length: 139
 requirements:
  - requirements.txt
  - requirements-test.txt
  - requirements-optional.txt
 ignore-paths:
  - parse.py
  - utils/
 python-targets:
  - 2
  - 3
 mccabe:
  run: false
--- a/testing/web-platform/tests/tools/third_party/html5lib/.pylintrc
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.pylintrc
@ -0,0 +1,10 @@
 [MASTER]
 ignore=tests
 [MESSAGES CONTROL]
 # messages up to fixme should probably be fixed somehow
 disable = redefined-builtin,attribute-defined-outside-init,anomalous-backslash-in-string,no-self-use,redefined-outer-name,bad-continuation,wrong-import-order,superfluous-parens,no-member,duplicate-code,super-init-not-called,abstract-method,property-on-old-class,wrong-import-position,no-name-in-module,no-init,bad-mcs-classmethod-argument,bad-classmethod-argument,fixme,invalid-name,import-error,too-few-public-methods,too-many-ancestors,too-many-arguments,too-many-boolean-expressions,too-many-branches,too-many-instance-attributes,too-many-locals,too-many-lines,too-many-public-methods,too-many-return-statements,too-many-statements,missing-docstring,line-too-long,locally-disabled,locally-enabled,bad-builtin,deprecated-lambda
 [FORMAT]
 max-line-length=139
 single-line-if-stmt=no
--- a/testing/web-platform/tests/tools/third_party/html5lib/.pytest.expect
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.pytest.expect
--- a/testing/web-platform/tests/tools/third_party/html5lib/.travis.yml
+++ b/testing/web-platform/tests/tools/third_party/html5lib/.travis.yml
@ -0,0 +1,32 @@
 language: python
 python:
  - "pypy"
  - "3.6"
  - "3.5"
  - "3.4"
  - "3.3"
  - "2.7"
 sudo: false
 cache: pip
 env:
  global:
    - PYTEST_COMMAND="coverage run -m pytest"
  matrix:
    - TOXENV=optional
    - TOXENV=base
    - TOXENV=six19-optional
 install:
  - pip install tox codecov
 script:
  - tox
 after_script:
  - python debug-info.py
 after_success:
  - codecov
--- a/testing/web-platform/tests/tools/third_party/html5lib/AUTHORS.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/AUTHORS.rst
@ -6,6 +6,7 @@ Credits
 - James Graham
 - Geoffrey Sneddon
 - Łukasz Langa
 - Will Kahn-Greene
 Patches and suggestions
@ -16,19 +17,50 @@ Patches and suggestions
 - Lachlan Hunt
 - lantis63
 - Sam Ruby
 - Tim Fletcher
 - Thomas Broyer
 - Tim Fletcher
 - Mark Pilgrim
 - Philip Taylor
 - Ryan King
 - Philip Taylor
 - Edward Z. Yang
 - fantasai
 - Philip Jägenstedt
 - Ms2ger
 - Mohammad Taha Jahangir
 - Andy Wingo
 - Andreas Madsack
 - Karim Valiev
 - Mohammad Taha Jahangir
 - Juan Carlos Garcia Segovia
 - Mike West
 - Marc DM
 - Simon Sapin
 - Michael[tm] Smith
 - Ritwik Gupta
 - Marc Abramowitz
 - Tony Lopes
 - lilbludevil
 - Kevin
 - Drew Hubl
 - Austin Kumbera
 - Jim Baker
 - Jon Dufresne
 - Donald Stufft
 - Alex Gaynor
 - Nik Nyby
 - Jakub Wilk
 - Sigmund Cherem
 - Gabi Davar
 - Florian Mounier
 - neumond
 - Vitalik Verhovodov
 - Kovid Goyal
 - Adam Chainz
 - John Vandenberg
 - Eric Amorde
 - Benedikt Morbach
 - Jonathan Vanasco
 - Tom Most
 - Ville Skyttä
 - Hugo van Kemenade
 - Mark Vasilkov
--- a/testing/web-platform/tests/tools/third_party/html5lib/CHANGES.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/CHANGES.rst
@ -0,0 +1,335 @@
 Change Log
 ----------
 1.0.1
 ~~~~~
 Released on December 7, 2017
 Breaking changes:
 * Drop support for Python 2.6. (#330) (Thank you, Hugo, Will Kahn-Greene!)
 * Remove ``utils/spider.py`` (#353) (Thank you, Jon Dufresne!)
 Features:
 * Improve documentation. (#300, #307) (Thank you, Jon Dufresne, Tom Most,
  Will Kahn-Greene!)
 * Add iframe seamless boolean attribute. (Thank you, Ritwik Gupta!)
 * Add itemscope as a boolean attribute. (#194) (Thank you, Jonathan Vanasco!)
 * Support Python 3.6. (#333) (Thank you, Jon Dufresne!)
 * Add CI support for Windows using AppVeyor. (Thank you, John Vandenberg!)
 * Improve testing and CI and add code coverage (#323, #334), (Thank you, Jon
  Dufresne, John Vandenberg, Geoffrey Sneddon, Will Kahn-Greene!)
 * Semver-compliant version number.
 Bug fixes:
 * Add support for setuptools < 18.5 to support environment markers. (Thank you,
  John Vandenberg!)
 * Add explicit dependency for six >= 1.9. (Thank you, Eric Amorde!)
 * Fix regexes to work with Python 3.7 regex adjustments. (#318, #379) (Thank
  you, Benedikt Morbach, Ville Skyttä, Mark Vasilkov!)
 * Fix alphabeticalattributes filter namespace bug. (#324) (Thank you, Will
  Kahn-Greene!)
 * Include license file in generated wheel package. (#350) (Thank you, Jon
  Dufresne!)
 * Fix annotation-xml typo. (#339) (Thank you, Will Kahn-Greene!)
 * Allow uppercase hex chararcters in CSS colour check. (#377) (Thank you,
  Komal Dembla, Hugo!)
 1.0
 ~~~
 Released and unreleased on December 7, 2017. Badly packaged release.
 0.999999999/1.0b10
 ~~~~~~~~~~~~~~~~~~
 Released on July 15, 2016
 * Fix attribute order going to the tree builder to be document order
  instead of reverse document order(!).
 0.99999999/1.0b9
 ~~~~~~~~~~~~~~~~
 Released on July 14, 2016
 * **Added ordereddict as a mandatory dependency on Python 2.6.**
 * Added ``lxml``, ``genshi``, ``datrie``, ``charade``, and ``all``
  extras that will do the right thing based on the specific
  interpreter implementation.
 * Now requires the ``mock`` package for the testsuite.
 * Cease supporting DATrie under PyPy.
 * **Remove PullDOM support, as this hasn't ever been properly
  tested, doesn't entirely work, and as far as I can tell is
  completely unused by anyone.**
 * Move testsuite to ``py.test``.
 * **Fix #124: move to webencodings for decoding the input byte stream;
  this makes html5lib compliant with the Encoding Standard, and
  introduces a required dependency on webencodings.**
 * **Cease supporting Python 3.2 (in both CPython and PyPy forms).**
 * **Fix comments containing double-dash with lxml 3.5 and above.**
 * **Use scripting disabled by default (as we don't implement
  scripting).**
 * **Fix #11, avoiding the XSS bug potentially caused by serializer
  allowing attribute values to be escaped out of in old browser versions,
  changing the quote_attr_values option on serializer to take one of
  three values, "always" (the old True value), "legacy" (the new option,
  and the new default), and "spec" (the old False value, and the old
  default).**
 * **Fix #72 by rewriting the sanitizer to apply only to treewalkers
  (instead of the tokenizer); as such, this will require amending all
  callers of it to use it via the treewalker API.**
 * **Drop support of charade, now that chardet is supported once more.**
 * **Replace the charset keyword argument on parse and related methods
  with a set of keyword arguments: override_encoding, transport_encoding,
  same_origin_parent_encoding, likely_encoding, and default_encoding.**
 * **Move filters._base, treebuilder._base, and treewalkers._base to .base
  to clarify their status as public.**
 * **Get rid of the sanitizer package. Merge sanitizer.sanitize into the
  sanitizer.htmlsanitizer module and move that to sanitizer. This means
  anyone who used sanitizer.sanitize or sanitizer.HTMLSanitizer needs no
  code changes.**
 * **Rename treewalkers.lxmletree to .etree_lxml and
  treewalkers.genshistream to .genshi to have a consistent API.**
 * Move a whole load of stuff (inputstream, ihatexml, trie, tokenizer,
  utils) to be underscore prefixed to clarify their status as private.
 0.9999999/1.0b8
 ~~~~~~~~~~~~~~~
 Released on September 10, 2015
 * Fix #195: fix the sanitizer to drop broken URLs (it threw an
  exception between 0.9999 and 0.999999).
 0.999999/1.0b7
 ~~~~~~~~~~~~~~
 Released on July 7, 2015
 * Fix #189: fix the sanitizer to allow relative URLs again (as it did
  prior to 0.9999/1.0b5).
 0.99999/1.0b6
 ~~~~~~~~~~~~~
 Released on April 30, 2015
 * Fix #188: fix the sanitizer to not throw an exception when sanitizing
  bogus data URLs.
 0.9999/1.0b5
 ~~~~~~~~~~~~
 Released on April 29, 2015
 * Fix #153: Sanitizer fails to treat some attributes as URLs. Despite how
  this sounds, this has no known security implications.  No known version
  of IE (5.5 to current), Firefox (3 to current), Safari (6 to current),
  Chrome (1 to current), or Opera (12 to current) will run any script
  provided in these attributes.
 * Pass error message to the ParseError exception in strict parsing mode.
 * Allow data URIs in the sanitizer, with a whitelist of content-types.
 * Add support for Python implementations that don't support lone
  surrogates (read: Jython). Fixes #2.
 * Remove localization of error messages. This functionality was totally
  unused (and untested that everything was localizable), so we may as
  well follow numerous browsers in not supporting translating technical
  strings.
 * Expose treewalkers.pprint as a public API.
 * Add a documentEncoding property to HTML5Parser, fix #121.
 0.999
 ~~~~~
 Released on December 23, 2013
 * Fix #127: add work-around for CPython issue #20007: .read(0) on
  http.client.HTTPResponse drops the rest of the content.
 * Fix #115: lxml treewalker can now deal with fragments containing, at
  their root level, text nodes with non-ASCII characters on Python 2.
 0.99
 ~~~~
 Released on September 10, 2013
 * No library changes from 1.0b3; released as 0.99 as pip has changed
  behaviour from 1.4 to avoid installing pre-release versions per
  PEP 440.
 1.0b3
 ~~~~~
 Released on July 24, 2013
 * Removed ``RecursiveTreeWalker`` from ``treewalkers._base``. Any
  implementation using it should be moved to
  ``NonRecursiveTreeWalker``, as everything bundled with html5lib has
  for years.
 * Fix #67 so that ``BufferedStream`` to correctly returns a bytes
  object, thereby fixing any case where html5lib is passed a
  non-seekable RawIOBase-like object.
 1.0b2
 ~~~~~
 Released on June 27, 2013
 * Removed reordering of attributes within the serializer. There is now
  an ``alphabetical_attributes`` option which preserves the previous
  behaviour through a new filter. This allows attribute order to be
  preserved through html5lib if the tree builder preserves order.
 * Removed ``dom2sax`` from DOM treebuilders. It has been replaced by
  ``treeadapters.sax.to_sax`` which is generic and supports any
  treewalker; it also resolves all known bugs with ``dom2sax``.
 * Fix treewalker assertions on hitting bytes strings on
  Python 2. Previous to 1.0b1, treewalkers coped with mixed
  bytes/unicode data on Python 2; this reintroduces this prior
  behaviour on Python 2. Behaviour is unchanged on Python 3.
 1.0b1
 ~~~~~
 Released on May 17, 2013
 * Implementation updated to implement the `HTML specification
  <http://www.whatwg.org/specs/web-apps/current-work/>`_ as of 5th May
  2013 (`SVN <http://svn.whatwg.org/webapps/>`_ revision r7867).
 * Python 3.2+ supported in a single codebase using the ``six`` library.
 * Removed support for Python 2.5 and older.
 * Removed the deprecated Beautiful Soup 3 treebuilder.
  ``beautifulsoup4`` can use ``html5lib`` as a parser instead. Note that
  since it doesn't support namespaces, foreign content like SVG and
  MathML is parsed incorrectly.
 * Removed ``simpletree`` from the package. The default tree builder is
  now ``etree`` (using the ``xml.etree.cElementTree`` implementation if
  available, and ``xml.etree.ElementTree`` otherwise).
 * Removed the ``XHTMLSerializer`` as it never actually guaranteed its
  output was well-formed XML, and hence provided little of use.
 * Removed default DOM treebuilder, so ``html5lib.treebuilders.dom`` is no
  longer supported. ``html5lib.treebuilders.getTreeBuilder("dom")`` will
  return the default DOM treebuilder, which uses ``xml.dom.minidom``.
 * Optional heuristic character encoding detection now based on
  ``charade`` for Python 2.6 - 3.3 compatibility.
 * Optional ``Genshi`` treewalker support fixed.
 * Many bugfixes, including:
  * #33: null in attribute value breaks XML AttValue;
  * #4: nested, indirect descendant, <button> causes infinite loop;
  * `Google Code 215
    <http://code.google.com/p/html5lib/issues/detail?id=215>`_: Properly
    detect seekable streams;
  * `Google Code 206
    <http://code.google.com/p/html5lib/issues/detail?id=206>`_: add
    support for <video preload=...>, <audio preload=...>;
  * `Google Code 205
    <http://code.google.com/p/html5lib/issues/detail?id=205>`_: add
    support for <video poster=...>;
  * `Google Code 202
    <http://code.google.com/p/html5lib/issues/detail?id=202>`_: Unicode
    file breaks InputStream.
 * Source code is now mostly PEP 8 compliant.
 * Test harness has been improved and now depends on ``nose``.
 * Documentation updated and moved to https://html5lib.readthedocs.io/.
 0.95
 ~~~~
 Released on February 11, 2012
 0.90
 ~~~~
 Released on January 17, 2010
 0.11.1
 ~~~~~~
 Released on June 12, 2008
 0.11
 ~~~~
 Released on June 10, 2008
 0.10
 ~~~~
 Released on October 7, 2007
 0.9
 ~~~
 Released on March 11, 2007
 0.2
 ~~~
 Released on January 8, 2007
--- a/testing/web-platform/tests/tools/third_party/html5lib/CONTRIBUTING.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/CONTRIBUTING.rst
--- a/testing/web-platform/tests/tools/third_party/html5lib/LICENSE
+++ b/testing/web-platform/tests/tools/third_party/html5lib/LICENSE
--- a/testing/web-platform/tests/tools/third_party/html5lib/MANIFEST.in
+++ b/testing/web-platform/tests/tools/third_party/html5lib/MANIFEST.in
@ -1,6 +1,10 @@
 include LICENSE
 include AUTHORS.rst
 include CHANGES.rst
 include README.rst
 include requirements*.txt
 include .pytest.expect
 include tox.ini
 include pytest.ini
 graft html5lib/tests/testdata
 recursive-include html5lib/tests *.py
--- a/testing/web-platform/tests/tools/third_party/html5lib/README.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/README.rst
@ -51,7 +51,7 @@ pass into html5lib as follows:
  import html5lib
  with closing(urlopen("http://example.com/")) as f:
-      document = html5lib.parse(f, encoding=f.info().getparam("charset"))
+      document = html5lib.parse(f, transport_encoding=f.info().getparam("charset"))
 When using with ``urllib.request`` (Python 3), the charset from HTTP
 should be pass into html5lib as follows:
@ -62,7 +62,7 @@ should be pass into html5lib as follows:
  import html5lib
  with urlopen("http://example.com/") as f:
-      document = html5lib.parse(f, encoding=f.info().get_content_charset())
+      document = html5lib.parse(f, transport_encoding=f.info().get_content_charset())
 To have more control over the parser, create a parser object explicitly.
 For instance, to make the parser raise exceptions on parse errors, use:
@ -84,13 +84,13 @@ format:
  parser = html5lib.HTMLParser(tree=html5lib.getTreeBuilder("dom"))
  minidom_document = parser.parse("<p>Hello World!")
-More documentation is available at http://html5lib.readthedocs.org/.
+More documentation is available at https://html5lib.readthedocs.io/.
 Installation
 ------------
-html5lib works on CPython 2.6+, CPython 3.2+ and PyPy.  To install it,
+html5lib works on CPython 2.7+, CPython 3.3+ and PyPy.  To install it,
 use:
 .. code-block:: bash
@ -104,8 +104,8 @@ Optional Dependencies
 The following third-party libraries may be used for additional
 functionality:
- ``datrie`` can be used to improve parsing performance (though in
+- ``datrie`` can be used under CPython to improve parsing performance
-  almost all cases the improvement is marginal);
+  (though in almost all cases the improvement is marginal);
 - ``lxml`` is supported as a tree format (for both building and
  walking) under CPython (but *not* PyPy where it is known to cause
@ -113,13 +113,8 @@ functionality:
 - ``genshi`` has a treewalker (but not builder); and
- ``charade`` can be used as a fallback when character encoding cannot
+- ``chardet`` can be used as a fallback when character encoding cannot
-  be determined; ``chardet``, from which it was forked, can also be used
+  be determined.
  on Python 2.
 - ``ordereddict`` can be used under Python 2.6
  (``collections.OrderedDict`` is used instead on later versions) to
  serialize attributes in alphabetical order.
 Bugs
@ -132,9 +127,8 @@ Please report any bugs on the `issue tracker
 Tests
 -----
-Unit tests require the ``nose`` library and can be run using the
+Unit tests require the ``pytest`` and ``mock`` libraries and can be
-``nosetests`` command in the root directory; ``ordereddict`` is
+run using the ``py.test`` command in the root directory.
 required under Python 2.6. All should pass.
 Test data are contained in a separate `html5lib-tests
 <https://github.com/html5lib/html5lib-tests>`_ repository and included
--- a/testing/web-platform/tests/tools/third_party/html5lib/debug-info.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/debug-info.py
@ -12,7 +12,7 @@ info = {
    "maxsize": sys.maxsize
 }
-search_modules = ["charade", "chardet", "datrie", "genshi", "html5lib", "lxml", "six"]
+search_modules = ["chardet", "datrie", "genshi", "html5lib", "lxml", "six"]
 found_modules = []
 for m in search_modules:
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/Makefile
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/Makefile
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/changes.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/changes.rst
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/conf.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/conf.py
@ -126,7 +126,7 @@ html_theme = 'default'
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+#html_static_path = ['_static']
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.filters.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.filters.rst
@ -1,59 +1,58 @@
 filters Package
 ===============
-:mod:`_base` Module
+:mod:`base` Module
 -------------------
-.. automodule:: html5lib.filters._base
+.. automodule:: html5lib.filters.base
    :members:
    :undoc-members:
    :show-inheritance:
    :special-members: __init__
 :mod:`alphabeticalattributes` Module
 ------------------------------------
 .. automodule:: html5lib.filters.alphabeticalattributes
    :members:
    :undoc-members:
    :show-inheritance:
    :special-members: __init__
 :mod:`inject_meta_charset` Module
 ---------------------------------
 .. automodule:: html5lib.filters.inject_meta_charset
    :members:
    :undoc-members:
    :show-inheritance:
    :special-members: __init__
 :mod:`lint` Module
 ------------------
 .. automodule:: html5lib.filters.lint
    :members:
    :undoc-members:
    :show-inheritance:
    :special-members: __init__
 :mod:`optionaltags` Module
 --------------------------
 .. automodule:: html5lib.filters.optionaltags
    :members:
    :undoc-members:
    :show-inheritance:
    :special-members: __init__
 :mod:`sanitizer` Module
 -----------------------
 .. automodule:: html5lib.filters.sanitizer
    :members:
    :undoc-members:
    :show-inheritance:
    :special-members: __init__
 :mod:`whitespace` Module
 ------------------------
 .. automodule:: html5lib.filters.whitespace
    :members:
    :undoc-members:
    :show-inheritance:
-
+    :special-members: __init__
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.rst
@ -0,0 +1,38 @@
 html5lib Package
 ================
 .. automodule:: html5lib
    :members: __version__
 :mod:`constants` Module
 -----------------------
 .. automodule:: html5lib.constants
    :members:
    :show-inheritance:
 :mod:`html5parser` Module
 -------------------------
 .. automodule:: html5lib.html5parser
    :members:
    :show-inheritance:
    :special-members: __init__
 :mod:`serializer` Module
 ------------------------
 .. automodule:: html5lib.serializer
    :members:
    :show-inheritance:
    :special-members: __init__
 Subpackages
 -----------
 .. toctree::
    html5lib.filters
    html5lib.treebuilders
    html5lib.treewalkers
    html5lib.treeadapters
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.treeadapters.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.treeadapters.rst
@ -0,0 +1,20 @@
 treeadapters Package
 ====================
 :mod:`~html5lib.treeadapters` Package
 -------------------------------------
 .. automodule:: html5lib.treeadapters
    :members:
    :show-inheritance:
    :special-members: __init__
 .. automodule:: html5lib.treeadapters.genshi
    :members:
    :show-inheritance:
    :special-members: __init__
 .. automodule:: html5lib.treeadapters.sax
    :members:
    :show-inheritance:
    :special-members: __init__
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.treebuilders.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.treebuilders.rst
@ -6,38 +6,37 @@ treebuilders Package
 .. automodule:: html5lib.treebuilders
    :members:
    :undoc-members:
    :show-inheritance:
    :special-members: __init__
-:mod:`_base` Module
+:mod:`base` Module
 -------------------
-.. automodule:: html5lib.treebuilders._base
+.. automodule:: html5lib.treebuilders.base
    :members:
    :undoc-members:
    :show-inheritance:
    :special-members: __init__
 :mod:`dom` Module
 -----------------
 .. automodule:: html5lib.treebuilders.dom
    :members:
    :undoc-members:
    :show-inheritance:
    :special-members: __init__
 :mod:`etree` Module
 -------------------
 .. automodule:: html5lib.treebuilders.etree
    :members:
    :undoc-members:
    :show-inheritance:
    :special-members: __init__
 :mod:`etree_lxml` Module
 ------------------------
 .. automodule:: html5lib.treebuilders.etree_lxml
    :members:
    :undoc-members:
    :show-inheritance:
-
+    :special-members: __init__
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.treewalkers.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/html5lib.treewalkers.rst
@ -0,0 +1,50 @@
 treewalkers Package
 ===================
 :mod:`treewalkers` Package
 --------------------------
 .. automodule:: html5lib.treewalkers
    :members:
    :show-inheritance:
    :special-members: __init__
 :mod:`base` Module
 ------------------
 .. automodule:: html5lib.treewalkers.base
    :members:
    :show-inheritance:
    :special-members: __init__
 :mod:`dom` Module
 -----------------
 .. automodule:: html5lib.treewalkers.dom
    :members:
    :show-inheritance:
    :special-members: __init__
 :mod:`etree` Module
 -------------------
 .. automodule:: html5lib.treewalkers.etree
    :members:
    :show-inheritance:
    :special-members: __init__
 :mod:`etree_lxml` Module
 ------------------------
 .. automodule:: html5lib.treewalkers.etree_lxml
    :members:
    :show-inheritance:
    :special-members: __init__
 :mod:`genshi` Module
 --------------------
 .. automodule:: html5lib.treewalkers.genshi
    :members:
    :show-inheritance:
    :special-members: __init__
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/index.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/index.rst
@ -8,6 +8,7 @@ Overview
   :maxdepth: 2
   movingparts
   modules
   changes
   License <license>
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/license.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/license.rst
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/make.bat
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/make.bat
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/modules.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/modules.rst
--- a/testing/web-platform/tests/tools/third_party/html5lib/doc/movingparts.rst
+++ b/testing/web-platform/tests/tools/third_party/html5lib/doc/movingparts.rst
@ -4,22 +4,25 @@ The moving parts
 html5lib consists of a number of components, which are responsible for
 handling its features.
 Parsing uses a *tree builder* to generate a *tree*, the in-memory representation of the document.
 Several tree representations are supported, as are translations to other formats via *tree adapters*.
 The tree may be translated to a token stream with a *tree walker*, from which :class:`~html5lib.serializer.HTMLSerializer` produces a stream of bytes.
 The token stream may also be transformed by use of *filters* to accomplish tasks like sanitization.
 Tree builders
 -------------
 The parser reads HTML by tokenizing the content and building a tree that
-the user can later access. There are three main types of trees that
+the user can later access. html5lib can build three types of trees:
 html5lib can build:
-* ``etree`` - this is the default; builds a tree based on ``xml.etree``,
+* ``etree`` - this is the default; builds a tree based on :mod:`xml.etree`,
  which can be found in the standard library. Whenever possible, the
  accelerated ``ElementTree`` implementation (i.e.
  ``xml.etree.cElementTree`` on Python 2.x) is used.
-* ``dom`` - builds a tree based on ``xml.dom.minidom``.
+* ``dom`` - builds a tree based on :mod:`xml.dom.minidom`.
-* ``lxml.etree`` - uses lxml's implementation of the ``ElementTree``
+* ``lxml`` - uses the :mod:`lxml.etree` implementation of the ``ElementTree``
  API.  The performance gains are relatively small compared to using the
  accelerated ``ElementTree`` module.
@ -31,21 +34,15 @@ You can specify the builder by name when using the shorthand API:
  with open("mydocument.html", "rb") as f:
      lxml_etree_document = html5lib.parse(f, treebuilder="lxml")
-When instantiating a parser object, you have to pass a tree builder
+To get a builder class by name, use the :func:`~html5lib.treebuilders.getTreeBuilder` function.
-class in the ``tree`` keyword attribute:
+
 When instantiating a :class:`~html5lib.html5parser.HTMLParser` object, you must pass a tree builder class via the ``tree`` keyword attribute:
 .. code-block:: python
  import html5lib
-  parser = html5lib.HTMLParser(tree=SomeTreeBuilder)
+  TreeBuilder = html5lib.getTreeBuilder("dom")
-  document = parser.parse("<p>Hello World!")
+  parser = html5lib.HTMLParser(tree=TreeBuilder)
 To get a builder class by name, use the ``getTreeBuilder`` function:
 .. code-block:: python
  import html5lib
  parser = html5lib.HTMLParser(tree=html5lib.getTreeBuilder("dom"))
  minidom_document = parser.parse("<p>Hello World!")
 The implementation of builders can be found in `html5lib/treebuilders/
@ -55,17 +52,13 @@ The implementation of builders can be found in `html5lib/treebuilders/
 Tree walkers
 ------------
-Once a tree is ready, you can work on it either manually, or using
+In addition to manipulating a tree directly, you can use a tree walker to generate a streaming view of it.
-a tree walker, which provides a streaming view of the tree. html5lib
+html5lib provides walkers for ``etree``, ``dom``, and ``lxml`` trees, as well as ``genshi`` `markup streams <https://genshi.edgewall.org/wiki/Documentation/streams.html>`_.
 provides walkers for all three supported types of trees (``etree``,
 ``dom`` and ``lxml``).
 The implementation of walkers can be found in `html5lib/treewalkers/
 <https://github.com/html5lib/html5lib-python/tree/master/html5lib/treewalkers>`_.
-Walkers make consuming HTML easier. html5lib uses them to provide you
+html5lib provides :class:`~html5lib.serializer.HTMLSerializer` for generating a stream of bytes from a token stream, and several filters which manipulate the stream.
 with has a couple of handy tools.
 HTMLSerializer
 ~~~~~~~~~~~~~~
@ -90,15 +83,14 @@ The serializer lets you write HTML back as a stream of bytes.
  '>'
  'Witam wszystkich'
-You can customize the serializer behaviour in a variety of ways, consult
+You can customize the serializer behaviour in a variety of ways. Consult
-the :class:`~html5lib.serializer.htmlserializer.HTMLSerializer`
+the :class:`~html5lib.serializer.HTMLSerializer` documentation.
 documentation.
 Filters
 ~~~~~~~
-You can alter the stream content with filters provided by html5lib:
+html5lib provides several filters:
 * :class:`alphabeticalattributes.Filter
  <html5lib.filters.alphabeticalattributes.Filter>` sorts attributes on
@ -110,11 +102,11 @@ You can alter the stream content with filters provided by html5lib:
  the document
 * :class:`lint.Filter <html5lib.filters.lint.Filter>` raises
-  ``LintError`` exceptions on invalid tag and attribute names, invalid
+  :exc:`AssertionError` exceptions on invalid tag and attribute names, invalid
  PCDATA, etc.
 * :class:`optionaltags.Filter <html5lib.filters.optionaltags.Filter>`
-  removes tags from the stream which are not necessary to produce valid
+  removes tags from the token stream which are not necessary to produce valid
  HTML
 * :class:`sanitizer.Filter <html5lib.filters.sanitizer.Filter>` removes
@ -125,9 +117,9 @@ You can alter the stream content with filters provided by html5lib:
 * :class:`whitespace.Filter <html5lib.filters.whitespace.Filter>`
  collapses all whitespace characters to single spaces unless they're in
-  ``<pre/>`` or ``textarea`` tags.
+  ``<pre/>`` or ``<textarea/>`` tags.
-To use a filter, simply wrap it around a stream:
+To use a filter, simply wrap it around a token stream:
 .. code-block:: python
@ -136,15 +128,17 @@ To use a filter, simply wrap it around a stream:
  >>> dom = html5lib.parse("<p><script>alert('Boo!')", treebuilder="dom")
  >>> walker = html5lib.getTreeWalker("dom")
  >>> stream = walker(dom)
-  >>> sane_stream = sanitizer.Filter(stream) clean_stream = sanitizer.Filter(stream)
+  >>> clean_stream = sanitizer.Filter(stream)
 Tree adapters
 -------------
-Used to translate one type of tree to another. More documentation
+Tree adapters can be used to translate between tree formats.
-pending, sorry.
+Two adapters are provided by html5lib:
 * :func:`html5lib.treeadapters.genshi.to_genshi()` generates a `Genshi markup stream <https://genshi.edgewall.org/wiki/Documentation/streams.html>`_.
 * :func:`html5lib.treeadapters.sax.to_sax()` calls a SAX handler based on the tree.
 Encoding discovery
 ------------------
@ -156,54 +150,16 @@ the following way:
 * The encoding may be explicitly specified by passing the name of the
  encoding as the encoding parameter to the
  :meth:`~html5lib.html5parser.HTMLParser.parse` method on
-  ``HTMLParser`` objects.
+  :class:`~html5lib.html5parser.HTMLParser` objects.
 * If no encoding is specified, the parser will attempt to detect the
  encoding from a ``<meta>``  element in the first 512 bytes of the
  document (this is only a partial implementation of the current HTML
-  5 specification).
+  specification).
-* If no encoding can be found and the chardet library is available, an
+* If no encoding can be found and the :mod:`chardet` library is available, an
  attempt will be made to sniff the encoding from the byte pattern.
 * If all else fails, the default encoding will be used. This is usually
  `Windows-1252 <http://en.wikipedia.org/wiki/Windows-1252>`_, which is
  a common fallback used by Web browsers.
 Tokenizers
 ----------
 The part of the parser responsible for translating a raw input stream
 into meaningful tokens is the tokenizer. Currently html5lib provides
 two.
 To set up a tokenizer, simply pass it when instantiating
 a :class:`~html5lib.html5parser.HTMLParser`:
 .. code-block:: python
  import html5lib
  from html5lib import sanitizer
  p = html5lib.HTMLParser(tokenizer=sanitizer.HTMLSanitizer)
  p.parse("<p>Surprise!<script>alert('Boo!');</script>")
 HTMLTokenizer
 ~~~~~~~~~~~~~
 This is the default tokenizer, the heart of html5lib. The implementation
 can be found in `html5lib/tokenizer.py
 <https://github.com/html5lib/html5lib-python/blob/master/html5lib/tokenizer.py>`_.
 HTMLSanitizer
 ~~~~~~~~~~~~~
 This is a tokenizer that removes unsafe markup and CSS styles from the
 input. Elements that are known to be safe are passed through and the
 rest is converted to visible text. The default configuration of the
 sanitizer follows the `WHATWG Sanitization Rules
 <http://wiki.whatwg.org/wiki/Sanitization_rules>`_.
 The implementation can be found in `html5lib/sanitizer.py
 <https://github.com/html5lib/html5lib-python/blob/master/html5lib/sanitizer.py>`_.
--- a/testing/web-platform/tests/tools/third_party/html5lib/flake8-run.sh
+++ b/testing/web-platform/tests/tools/third_party/html5lib/flake8-run.sh
@ -0,0 +1,9 @@
 #!/bin/bash -e
 if [[ ! -x $(which flake8) ]]; then
  echo "fatal: flake8 not found on $PATH. Exiting."
  exit 1
 fi
 flake8 `dirname $0`
 exit $?
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/init.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/init.py
@ -0,0 +1,35 @@
 """
 HTML parsing library based on the `WHATWG HTML specification
 <https://whatwg.org/html>`_. The parser is designed to be compatible with
 existing HTML found in the wild and implements well-defined error recovery that
 is largely compatible with modern desktop web browsers.
 Example usage::
    import html5lib
    with open("my_document.html", "rb") as f:
        tree = html5lib.parse(f)
 For convenience, this module re-exports the following names:
 * :func:`~.html5parser.parse`
 * :func:`~.html5parser.parseFragment`
 * :class:`~.html5parser.HTMLParser`
 * :func:`~.treebuilders.getTreeBuilder`
 * :func:`~.treewalkers.getTreeWalker`
 * :func:`~.serializer.serialize`
 """
 from __future__ import absolute_import, division, unicode_literals
 from .html5parser import HTMLParser, parse, parseFragment
 from .treebuilders import getTreeBuilder
 from .treewalkers import getTreeWalker
 from .serializer import serialize
 __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
           "getTreeWalker", "serialize"]
 # this has to be at the top level, see how setup.py parses this
 #: Distribution version number.
 __version__ = "1.0.1"
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_ihatexml.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_ihatexml.py
@ -175,18 +175,18 @@ def escapeRegexp(string):
    return string
 # output from the above
-nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
+nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')  # noqa
-nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
+nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')  # noqa
 # Simpler things
-nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]")
+nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
 class InfosetFilter(object):
    replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
-    def __init__(self, replaceChars=None,
+    def __init__(self,
                 dropXmlnsLocalName=False,
                 dropXmlnsAttrNs=False,
                 preventDoubleDashComments=False,
@ -217,7 +217,7 @@ class InfosetFilter(object):
        else:
            return self.toXmlName(name)
-    def coerceElement(self, name, namespace=None):
+    def coerceElement(self, name):
        return self.toXmlName(name)
    def coerceComment(self, data):
@ -225,11 +225,14 @@ class InfosetFilter(object):
            while "--" in data:
                warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
                data = data.replace("--", "- -")
            if data.endswith("-"):
                warnings.warn("Comments cannot end in a dash", DataLossWarning)
                data += " "
        return data
    def coerceCharacters(self, data):
        if self.replaceFormFeedCharacters:
-            for i in range(data.count("\x0C")):
+            for _ in range(data.count("\x0C")):
                warnings.warn("Text cannot contain U+000C", DataLossWarning)
            data = data.replace("\x0C", " ")
        # Other non-xml characters
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_inputstream.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_inputstream.py
@ -1,13 +1,16 @@
 from __future__ import absolute_import, division, unicode_literals
-from six import text_type
+
-from six.moves import http_client
+from six import text_type, binary_type
 from six.moves import http_client, urllib
 import codecs
 import re
 import webencodings
 from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
-from .constants import encodings, ReparseException
+from .constants import _ReparseException
-from . import utils
+from . import _utils
 from io import StringIO
@ -16,19 +19,26 @@ try:
 except ImportError:
    BytesIO = StringIO
 try:
    from io import BufferedIOBase
 except ImportError:
    class BufferedIOBase(object):
        pass
 # Non-unicode versions of constants for use in the pre-parser
 spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
 asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
 asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
 spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
-invalid_unicode_re = re.compile("[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]")
+
 invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]"  # noqa
 if _utils.supports_lone_surrogates:
    # Use one extra step of indirection and create surrogates with
    # eval. Not using this indirection would introduce an illegal
    # unicode literal on platforms not supporting such lone
    # surrogates.
    assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1
    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] +
                                    eval('"\\uD800-\\uDFFF"') +  # pylint:disable=eval-used
                                    "]")
 else:
    invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
 non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
                                  0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
@ -38,7 +48,7 @@ non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
                                  0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
                                  0x10FFFE, 0x10FFFF])
-ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
+ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
 # Cache for charsUntil()
 charsUntilRegEx = {}
@ -118,10 +128,13 @@ class BufferedStream(object):
        return b"".join(rv)
-def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
+def HTMLInputStream(source, **kwargs):
-    if isinstance(source, http_client.HTTPResponse):
+    # Work around Python bug #20007: read(0) closes the connection.
-        # Work around Python bug #20007: read(0) closes the connection.
+    # http://bugs.python.org/issue20007
-        # http://bugs.python.org/issue20007
+    if (isinstance(source, http_client.HTTPResponse) or
        # Also check for addinfourl wrapping HTTPResponse
        (isinstance(source, urllib.response.addbase) and
         isinstance(source.fp, http_client.HTTPResponse))):
        isUnicode = False
    elif hasattr(source, "read"):
        isUnicode = isinstance(source.read(0), text_type)
@ -129,12 +142,13 @@ def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True):
        isUnicode = isinstance(source, text_type)
    if isUnicode:
-        if encoding is not None:
+        encodings = [x for x in kwargs if x.endswith("_encoding")]
-            raise TypeError("Cannot explicitly set an encoding with a unicode string")
+        if encodings:
            raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings)
-        return HTMLUnicodeInputStream(source)
+        return HTMLUnicodeInputStream(source, **kwargs)
    else:
-        return HTMLBinaryInputStream(source, encoding, parseMeta, chardet)
+        return HTMLBinaryInputStream(source, **kwargs)
 class HTMLUnicodeInputStream(object):
@ -160,22 +174,21 @@ class HTMLUnicodeInputStream(object):
        regardless of any BOM or later declaration (such as in a meta
        element)
        parseMeta - Look for a <meta> element containing encoding information
        """
-        # Craziness
+        if not _utils.supports_lone_surrogates:
-        if len("\U0010FFFF") == 1:
+            # Such platforms will have already checked for such
            # surrogate errors, so no need to do this checking.
            self.reportCharacterErrors = None
        elif len("\U0010FFFF") == 1:
            self.reportCharacterErrors = self.characterErrorsUCS4
            self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]")
        else:
            self.reportCharacterErrors = self.characterErrorsUCS2
            self.replaceCharactersRegexp = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])")
        # List of where new lines occur
        self.newLines = [0]
-        self.charEncoding = ("utf-8", "certain")
+        self.charEncoding = (lookupEncoding("utf-8"), "certain")
        self.dataStream = self.openStream(source)
        self.reset()
@ -265,12 +278,10 @@ class HTMLUnicodeInputStream(object):
                self._bufferedCharacter = data[-1]
                data = data[:-1]
-        self.reportCharacterErrors(data)
+        if self.reportCharacterErrors:
            self.reportCharacterErrors(data)
        # Replace invalid characters
        # Note U+0000 is dealt with in the tokenizer
        data = self.replaceCharactersRegexp.sub("\ufffd", data)
        data = data.replace("\r\n", "\n")
        data = data.replace("\r", "\n")
@ -280,7 +291,7 @@ class HTMLUnicodeInputStream(object):
        return True
    def characterErrorsUCS4(self, data):
-        for i in range(len(invalid_unicode_re.findall(data))):
+        for _ in range(len(invalid_unicode_re.findall(data))):
            self.errors.append("invalid-codepoint")
    def characterErrorsUCS2(self, data):
@ -293,9 +304,9 @@ class HTMLUnicodeInputStream(object):
            codepoint = ord(match.group())
            pos = match.start()
            # Pretty sure there should be endianness issues here
-            if utils.isSurrogatePair(data[pos:pos + 2]):
+            if _utils.isSurrogatePair(data[pos:pos + 2]):
                # We have a surrogate pair!
-                char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2])
+                char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2])
                if char_val in non_bmp_invalid_codepoints:
                    self.errors.append("invalid-codepoint")
                skip = True
@ -378,7 +389,9 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
    """
-    def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
+    def __init__(self, source, override_encoding=None, transport_encoding=None,
                 same_origin_parent_encoding=None, likely_encoding=None,
                 default_encoding="windows-1252", useChardet=True):
        """Initialises the HTMLInputStream.
        HTMLInputStream(source, [encoding]) -> Normalized stream from source
@ -391,8 +404,6 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
        regardless of any BOM or later declaration (such as in a meta
        element)
        parseMeta - Look for a <meta> element containing encoding information
        """
        # Raw Stream - for unicode objects this will encode to utf-8 and set
        #              self.charEncoding as appropriate
@ -400,27 +411,28 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
        HTMLUnicodeInputStream.__init__(self, self.rawStream)
        self.charEncoding = (codecName(encoding), "certain")
        # Encoding Information
        # Number of bytes to use when looking for a meta element with
        # encoding information
-        self.numBytesMeta = 512
+        self.numBytesMeta = 1024
        # Number of bytes to use when using detecting encoding using chardet
        self.numBytesChardet = 100
-        # Encoding to use if no other information can be found
+        # Things from args
-        self.defaultEncoding = "windows-1252"
+        self.override_encoding = override_encoding
        self.transport_encoding = transport_encoding
        self.same_origin_parent_encoding = same_origin_parent_encoding
        self.likely_encoding = likely_encoding
        self.default_encoding = default_encoding
-        # Detect encoding iff no explicit "transport level" encoding is supplied
+        # Determine encoding
-        if (self.charEncoding[0] is None):
+        self.charEncoding = self.determineEncoding(useChardet)
-            self.charEncoding = self.detectEncoding(parseMeta, chardet)
+        assert self.charEncoding[0] is not None
        # Call superclass
        self.reset()
    def reset(self):
-        self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream,
+        self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace')
                                                                 'replace')
        HTMLUnicodeInputStream.reset(self)
    def openStream(self, source):
@ -437,29 +449,50 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
        try:
            stream.seek(stream.tell())
-        except:
+        except:  # pylint:disable=bare-except
            stream = BufferedStream(stream)
        return stream
-    def detectEncoding(self, parseMeta=True, chardet=True):
+    def determineEncoding(self, chardet=True):
-        # First look for a BOM
+        # BOMs take precedence over everything
        # This will also read past the BOM if present
-        encoding = self.detectBOM()
+        charEncoding = self.detectBOM(), "certain"
-        confidence = "certain"
+        if charEncoding[0] is not None:
-        # If there is no BOM need to look for meta elements with encoding
+            return charEncoding
-        # information
+
-        if encoding is None and parseMeta:
+        # If we've been overriden, we've been overriden
-            encoding = self.detectEncodingMeta()
+        charEncoding = lookupEncoding(self.override_encoding), "certain"
-            confidence = "tentative"
+        if charEncoding[0] is not None:
-        # Guess with chardet, if avaliable
+            return charEncoding
-        if encoding is None and chardet:
+
-            confidence = "tentative"
+        # Now check the transport layer
        charEncoding = lookupEncoding(self.transport_encoding), "certain"
        if charEncoding[0] is not None:
            return charEncoding
        # Look for meta elements with encoding information
        charEncoding = self.detectEncodingMeta(), "tentative"
        if charEncoding[0] is not None:
            return charEncoding
        # Parent document encoding
        charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative"
        if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"):
            return charEncoding
        # "likely" encoding
        charEncoding = lookupEncoding(self.likely_encoding), "tentative"
        if charEncoding[0] is not None:
            return charEncoding
        # Guess with chardet, if available
        if chardet:
            try:
-                try:
+                from chardet.universaldetector import UniversalDetector
-                    from charade.universaldetector import UniversalDetector
+            except ImportError:
-                except ImportError:
+                pass
-                    from chardet.universaldetector import UniversalDetector
+            else:
                buffers = []
                detector = UniversalDetector()
                while not detector.done:
@ -470,37 +503,34 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
                    buffers.append(buffer)
                    detector.feed(buffer)
                detector.close()
-                encoding = detector.result['encoding']
+                encoding = lookupEncoding(detector.result['encoding'])
                self.rawStream.seek(0)
-            except ImportError:
+                if encoding is not None:
-                pass
+                    return encoding, "tentative"
        # If all else fails use the default encoding
        if encoding is None:
            confidence = "tentative"
            encoding = self.defaultEncoding
-        # Substitute for equivalent encodings:
+        # Try the default encoding
-        encodingSub = {"iso-8859-1": "windows-1252"}
+        charEncoding = lookupEncoding(self.default_encoding), "tentative"
        if charEncoding[0] is not None:
            return charEncoding
-        if encoding.lower() in encodingSub:
+        # Fallback to html5lib's default if even that hasn't worked
-            encoding = encodingSub[encoding.lower()]
+        return lookupEncoding("windows-1252"), "tentative"
        return encoding, confidence
    def changeEncoding(self, newEncoding):
        assert self.charEncoding[1] != "certain"
-        newEncoding = codecName(newEncoding)
+        newEncoding = lookupEncoding(newEncoding)
        if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"):
            newEncoding = "utf-8"
        if newEncoding is None:
            return
        if newEncoding.name in ("utf-16be", "utf-16le"):
            newEncoding = lookupEncoding("utf-8")
            assert newEncoding is not None
        elif newEncoding == self.charEncoding[0]:
            self.charEncoding = (self.charEncoding[0], "certain")
        else:
            self.rawStream.seek(0)
            self.reset()
            self.charEncoding = (newEncoding, "certain")
-            raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
+            self.reset()
            raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
    def detectBOM(self):
        """Attempts to detect at BOM at the start of the stream. If
@ -508,8 +538,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
        encoding otherwise return None"""
        bomDict = {
            codecs.BOM_UTF8: 'utf-8',
-            codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
+            codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be',
-            codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
+            codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be'
        }
        # Go to beginning of file and read in 4 bytes
@ -529,9 +559,12 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
        # Set the read position past the BOM if one was found, otherwise
        # set it to the start of the stream
-        self.rawStream.seek(encoding and seek or 0)
+        if encoding:
-
+            self.rawStream.seek(seek)
-        return encoding
+            return lookupEncoding(encoding)
        else:
            self.rawStream.seek(0)
            return None
    def detectEncodingMeta(self):
        """Report the encoding declared by the meta element
@ -542,8 +575,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
        self.rawStream.seek(0)
        encoding = parser.getEncoding()
-        if encoding in ("utf-16", "utf-16-be", "utf-16-le"):
+        if encoding is not None and encoding.name in ("utf-16be", "utf-16le"):
-            encoding = "utf-8"
+            encoding = lookupEncoding("utf-8")
        return encoding
@ -557,6 +590,7 @@ class EncodingBytes(bytes):
        return bytes.__new__(self, value.lower())
    def __init__(self, value):
        # pylint:disable=unused-argument
        self._position = -1
    def __iter__(self):
@ -667,7 +701,7 @@ class EncodingParser(object):
            (b"<!", self.handleOther),
            (b"<?", self.handleOther),
            (b"<", self.handlePossibleStartTag))
-        for byte in self.data:
+        for _ in self.data:
            keepParsing = True
            for key, method in methodDispatch:
                if self.data.matchBytes(key):
@ -706,7 +740,7 @@ class EncodingParser(object):
                        return False
                elif attr[0] == b"charset":
                    tentativeEncoding = attr[1]
-                    codec = codecName(tentativeEncoding)
+                    codec = lookupEncoding(tentativeEncoding)
                    if codec is not None:
                        self.encoding = codec
                        return False
@ -714,7 +748,7 @@ class EncodingParser(object):
                    contentParser = ContentAttrParser(EncodingBytes(attr[1]))
                    tentativeEncoding = contentParser.parse()
                    if tentativeEncoding is not None:
-                        codec = codecName(tentativeEncoding)
+                        codec = lookupEncoding(tentativeEncoding)
                        if codec is not None:
                            if hasPragma:
                                self.encoding = codec
@ -871,16 +905,19 @@ class ContentAttrParser(object):
            return None
-def codecName(encoding):
+def lookupEncoding(encoding):
    """Return the python codec name corresponding to an encoding or None if the
    string doesn't correspond to a valid encoding."""
-    if isinstance(encoding, bytes):
+    if isinstance(encoding, binary_type):
        try:
            encoding = encoding.decode("ascii")
        except UnicodeDecodeError:
            return None
-    if encoding:
+
-        canonicalName = ascii_punctuation_re.sub("", encoding).lower()
+    if encoding is not None:
-        return encodings.get(canonicalName, None)
+        try:
            return webencodings.lookup(encoding)
        except AttributeError:
            return None
    else:
        return None
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_tokenizer.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_tokenizer.py
@ -1,9 +1,6 @@
 from __future__ import absolute_import, division, unicode_literals
-try:
+from six import unichr as chr
    chr = unichr # flake8: noqa
 except NameError:
    pass
 from collections import deque
@ -14,9 +11,9 @@ from .constants import digits, hexDigits, EOF
 from .constants import tokenTypes, tagTokenTypes
 from .constants import replacementCharacters
-from .inputstream import HTMLInputStream
+from ._inputstream import HTMLInputStream
-from .trie import Trie
+from ._trie import Trie
 entitiesTrie = Trie(entities)
@ -34,16 +31,11 @@ class HTMLTokenizer(object):
      Points to HTMLInputStream object.
    """
-    def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
+    def __init__(self, stream, parser=None, **kwargs):
                 lowercaseElementName=True, lowercaseAttrName=True, parser=None):
-        self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet)
+        self.stream = HTMLInputStream(stream, **kwargs)
        self.parser = parser
        # Perform case conversions?
        self.lowercaseElementName = lowercaseElementName
        self.lowercaseAttrName = lowercaseAttrName
        # Setup the initial tokenizer state
        self.escapeFlag = False
        self.lastFourChars = []
@ -147,8 +139,8 @@ class HTMLTokenizer(object):
        output = "&"
        charStack = [self.stream.char()]
-        if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&")
+        if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or
-                or (allowedChar is not None and allowedChar == charStack[0])):
+                (allowedChar is not None and allowedChar == charStack[0])):
            self.stream.unget(charStack[0])
        elif charStack[0] == "#":
@ -235,8 +227,7 @@ class HTMLTokenizer(object):
        token = self.currentToken
        # Add token to the queue to be yielded
        if (token["type"] in tagTokenTypes):
-            if self.lowercaseElementName:
+            token["name"] = token["name"].translate(asciiUpper2Lower)
                token["name"] = token["name"].translate(asciiUpper2Lower)
            if token["type"] == tokenTypes["EndTag"]:
                if token["data"]:
                    self.tokenQueue.append({"type": tokenTypes["ParseError"],
@ -921,10 +912,9 @@ class HTMLTokenizer(object):
            # Attributes are not dropped at this stage. That happens when the
            # start tag token is emitted so values can still be safely appended
            # to attributes, but we do want to report the parse error in time.
-            if self.lowercaseAttrName:
+            self.currentToken["data"][-1][0] = (
-                self.currentToken["data"][-1][0] = (
+                self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
-                    self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
+            for name, _ in self.currentToken["data"][:-1]:
            for name, value in self.currentToken["data"][:-1]:
                if self.currentToken["data"][-1][0] == name:
                    self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
                                            "duplicate-attribute"})
@ -1716,11 +1706,11 @@ class HTMLTokenizer(object):
                else:
                    data.append(char)
-        data = "".join(data)
+        data = "".join(data)  # pylint:disable=redefined-variable-type
        # Deal with null here rather than in the parser
        nullCount = data.count("\u0000")
        if nullCount > 0:
-            for i in range(nullCount):
+            for _ in range(nullCount):
                self.tokenQueue.append({"type": tokenTypes["ParseError"],
                                        "data": "invalid-codepoint"})
            data = data.replace("\u0000", "\uFFFD")
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/init.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/init.py
@ -4,9 +4,11 @@ from .py import Trie as PyTrie
 Trie = PyTrie
 # pylint:disable=wrong-import-position
 try:
    from .datrie import Trie as DATrie
 except ImportError:
    pass
 else:
    Trie = DATrie
 # pylint:enable=wrong-import-position
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/_base.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/_base.py
@ -7,13 +7,13 @@ class Trie(Mapping):
    """Abstract base class for tries"""
    def keys(self, prefix=None):
-        keys = super().keys()
+        # pylint:disable=arguments-differ
        keys = super(Trie, self).keys()
        if prefix is None:
            return set(keys)
-        # Python 2.6: no set comprehensions
+        return {x for x in keys if x.startswith(prefix)}
        return set([x for x in keys if x.startswith(prefix)])
    def has_keys_with_prefix(self, prefix):
        for key in self.keys():
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/datrie.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/datrie.py
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/py.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/py.py
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_utils.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_utils.py
@ -2,6 +2,8 @@ from __future__ import absolute_import, division, unicode_literals
 from types import ModuleType
 from six import text_type
 try:
    import xml.etree.cElementTree as default_etree
 except ImportError:
@ -9,7 +11,26 @@ except ImportError:
 __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
-           "surrogatePairToCodepoint", "moduleFactoryFactory"]
+           "surrogatePairToCodepoint", "moduleFactoryFactory",
           "supports_lone_surrogates"]
 # Platforms not supporting lone surrogates (\uD800-\uDFFF) should be
 # caught by the below test. In general this would be any platform
 # using UTF-16 as its encoding of unicode strings, such as
 # Jython. This is because UTF-16 itself is based on the use of such
 # surrogates, and there is no mechanism to further escape such
 # escapes.
 try:
    _x = eval('"\\uD800"')  # pylint:disable=eval-used
    if not isinstance(_x, text_type):
        # We need this with u"" because of http://bugs.jython.org/issue2039
        _x = eval('u"\\uD800"')  # pylint:disable=eval-used
        assert isinstance(_x, text_type)
 except:  # pylint:disable=bare-except
    supports_lone_surrogates = False
 else:
    supports_lone_surrogates = True
 class MethodDispatcher(dict):
@ -31,19 +52,20 @@ class MethodDispatcher(dict):
        # anything here.
        _dictEntries = []
        for name, value in items:
-            if type(name) in (list, tuple, frozenset, set):
+            if isinstance(name, (list, tuple, frozenset, set)):
                for item in name:
                    _dictEntries.append((item, value))
            else:
                _dictEntries.append((name, value))
        dict.__init__(self, _dictEntries)
        assert len(self) == len(_dictEntries)
        self.default = None
    def __getitem__(self, key):
        return dict.get(self, key, self.default)
-# Some utility functions to dal with weirdness around UCS2 vs UCS4
+# Some utility functions to deal with weirdness around UCS2 vs UCS4
 # python builds
 def isSurrogatePair(data):
@ -70,13 +92,33 @@ def moduleFactoryFactory(factory):
        else:
            name = b"_%s_factory" % baseModule.__name__
-        if name in moduleCache:
+        kwargs_tuple = tuple(kwargs.items())
-            return moduleCache[name]
+
-        else:
+        try:
            return moduleCache[name][args][kwargs_tuple]
        except KeyError:
            mod = ModuleType(name)
            objs = factory(baseModule, *args, **kwargs)
            mod.__dict__.update(objs)
-            moduleCache[name] = mod
+            if "name" not in moduleCache:
                moduleCache[name] = {}
            if "args" not in moduleCache[name]:
                moduleCache[name][args] = {}
            if "kwargs" not in moduleCache[name][args]:
                moduleCache[name][args][kwargs_tuple] = {}
            moduleCache[name][args][kwargs_tuple] = mod
            return mod
    return moduleFactory
 def memoize(func):
    cache = {}
    def wrapped(*args, **kwargs):
        key = (tuple(args), tuple(kwargs.items()))
        if key not in cache:
            cache[key] = func(*args, **kwargs)
        return cache[key]
    return wrapped
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/constants.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/constants.py
@ -1,292 +1,296 @@
 from __future__ import absolute_import, division, unicode_literals
 import string
 import gettext
 _ = gettext.gettext
 EOF = None
 E = {
    "null-character":
-        _("Null character in input stream, replaced with U+FFFD."),
+        "Null character in input stream, replaced with U+FFFD.",
    "invalid-codepoint":
-        _("Invalid codepoint in stream."),
+        "Invalid codepoint in stream.",
    "incorrectly-placed-solidus":
-        _("Solidus (/) incorrectly placed in tag."),
+        "Solidus (/) incorrectly placed in tag.",
    "incorrect-cr-newline-entity":
-        _("Incorrect CR newline entity, replaced with LF."),
+        "Incorrect CR newline entity, replaced with LF.",
    "illegal-windows-1252-entity":
-        _("Entity used with illegal number (windows-1252 reference)."),
+        "Entity used with illegal number (windows-1252 reference).",
    "cant-convert-numeric-entity":
-        _("Numeric entity couldn't be converted to character "
+        "Numeric entity couldn't be converted to character "
-          "(codepoint U+%(charAsInt)08x)."),
+        "(codepoint U+%(charAsInt)08x).",
    "illegal-codepoint-for-numeric-entity":
-        _("Numeric entity represents an illegal codepoint: "
+        "Numeric entity represents an illegal codepoint: "
-          "U+%(charAsInt)08x."),
+        "U+%(charAsInt)08x.",
    "numeric-entity-without-semicolon":
-        _("Numeric entity didn't end with ';'."),
+        "Numeric entity didn't end with ';'.",
    "expected-numeric-entity-but-got-eof":
-        _("Numeric entity expected. Got end of file instead."),
+        "Numeric entity expected. Got end of file instead.",
    "expected-numeric-entity":
-        _("Numeric entity expected but none found."),
+        "Numeric entity expected but none found.",
    "named-entity-without-semicolon":
-        _("Named entity didn't end with ';'."),
+        "Named entity didn't end with ';'.",
    "expected-named-entity":
-        _("Named entity expected. Got none."),
+        "Named entity expected. Got none.",
    "attributes-in-end-tag":
-        _("End tag contains unexpected attributes."),
+        "End tag contains unexpected attributes.",
    'self-closing-flag-on-end-tag':
-        _("End tag contains unexpected self-closing flag."),
+        "End tag contains unexpected self-closing flag.",
    "expected-tag-name-but-got-right-bracket":
-        _("Expected tag name. Got '>' instead."),
+        "Expected tag name. Got '>' instead.",
    "expected-tag-name-but-got-question-mark":
-        _("Expected tag name. Got '?' instead. (HTML doesn't "
+        "Expected tag name. Got '?' instead. (HTML doesn't "
-          "support processing instructions.)"),
+        "support processing instructions.)",
    "expected-tag-name":
-        _("Expected tag name. Got something else instead"),
+        "Expected tag name. Got something else instead",
    "expected-closing-tag-but-got-right-bracket":
-        _("Expected closing tag. Got '>' instead. Ignoring '</>'."),
+        "Expected closing tag. Got '>' instead. Ignoring '</>'.",
    "expected-closing-tag-but-got-eof":
-        _("Expected closing tag. Unexpected end of file."),
+        "Expected closing tag. Unexpected end of file.",
    "expected-closing-tag-but-got-char":
-        _("Expected closing tag. Unexpected character '%(data)s' found."),
+        "Expected closing tag. Unexpected character '%(data)s' found.",
    "eof-in-tag-name":
-        _("Unexpected end of file in the tag name."),
+        "Unexpected end of file in the tag name.",
    "expected-attribute-name-but-got-eof":
-        _("Unexpected end of file. Expected attribute name instead."),
+        "Unexpected end of file. Expected attribute name instead.",
    "eof-in-attribute-name":
-        _("Unexpected end of file in attribute name."),
+        "Unexpected end of file in attribute name.",
    "invalid-character-in-attribute-name":
-        _("Invalid character in attribute name"),
+        "Invalid character in attribute name",
    "duplicate-attribute":
-        _("Dropped duplicate attribute on tag."),
+        "Dropped duplicate attribute on tag.",
    "expected-end-of-tag-name-but-got-eof":
-        _("Unexpected end of file. Expected = or end of tag."),
+        "Unexpected end of file. Expected = or end of tag.",
    "expected-attribute-value-but-got-eof":
-        _("Unexpected end of file. Expected attribute value."),
+        "Unexpected end of file. Expected attribute value.",
    "expected-attribute-value-but-got-right-bracket":
-        _("Expected attribute value. Got '>' instead."),
+        "Expected attribute value. Got '>' instead.",
    'equals-in-unquoted-attribute-value':
-        _("Unexpected = in unquoted attribute"),
+        "Unexpected = in unquoted attribute",
    'unexpected-character-in-unquoted-attribute-value':
-        _("Unexpected character in unquoted attribute"),
+        "Unexpected character in unquoted attribute",
    "invalid-character-after-attribute-name":
-        _("Unexpected character after attribute name."),
+        "Unexpected character after attribute name.",
    "unexpected-character-after-attribute-value":
-        _("Unexpected character after attribute value."),
+        "Unexpected character after attribute value.",
    "eof-in-attribute-value-double-quote":
-        _("Unexpected end of file in attribute value (\")."),
+        "Unexpected end of file in attribute value (\").",
    "eof-in-attribute-value-single-quote":
-        _("Unexpected end of file in attribute value (')."),
+        "Unexpected end of file in attribute value (').",
    "eof-in-attribute-value-no-quotes":
-        _("Unexpected end of file in attribute value."),
+        "Unexpected end of file in attribute value.",
    "unexpected-EOF-after-solidus-in-tag":
-        _("Unexpected end of file in tag. Expected >"),
+        "Unexpected end of file in tag. Expected >",
    "unexpected-character-after-solidus-in-tag":
-        _("Unexpected character after / in tag. Expected >"),
+        "Unexpected character after / in tag. Expected >",
    "expected-dashes-or-doctype":
-        _("Expected '--' or 'DOCTYPE'. Not found."),
+        "Expected '--' or 'DOCTYPE'. Not found.",
    "unexpected-bang-after-double-dash-in-comment":
-        _("Unexpected ! after -- in comment"),
+        "Unexpected ! after -- in comment",
    "unexpected-space-after-double-dash-in-comment":
-        _("Unexpected space after -- in comment"),
+        "Unexpected space after -- in comment",
    "incorrect-comment":
-        _("Incorrect comment."),
+        "Incorrect comment.",
    "eof-in-comment":
-        _("Unexpected end of file in comment."),
+        "Unexpected end of file in comment.",
    "eof-in-comment-end-dash":
-        _("Unexpected end of file in comment (-)"),
+        "Unexpected end of file in comment (-)",
    "unexpected-dash-after-double-dash-in-comment":
-        _("Unexpected '-' after '--' found in comment."),
+        "Unexpected '-' after '--' found in comment.",
    "eof-in-comment-double-dash":
-        _("Unexpected end of file in comment (--)."),
+        "Unexpected end of file in comment (--).",
    "eof-in-comment-end-space-state":
-        _("Unexpected end of file in comment."),
+        "Unexpected end of file in comment.",
    "eof-in-comment-end-bang-state":
-        _("Unexpected end of file in comment."),
+        "Unexpected end of file in comment.",
    "unexpected-char-in-comment":
-        _("Unexpected character in comment found."),
+        "Unexpected character in comment found.",
    "need-space-after-doctype":
-        _("No space after literal string 'DOCTYPE'."),
+        "No space after literal string 'DOCTYPE'.",
    "expected-doctype-name-but-got-right-bracket":
-        _("Unexpected > character. Expected DOCTYPE name."),
+        "Unexpected > character. Expected DOCTYPE name.",
    "expected-doctype-name-but-got-eof":
-        _("Unexpected end of file. Expected DOCTYPE name."),
+        "Unexpected end of file. Expected DOCTYPE name.",
    "eof-in-doctype-name":
-        _("Unexpected end of file in DOCTYPE name."),
+        "Unexpected end of file in DOCTYPE name.",
    "eof-in-doctype":
-        _("Unexpected end of file in DOCTYPE."),
+        "Unexpected end of file in DOCTYPE.",
    "expected-space-or-right-bracket-in-doctype":
-        _("Expected space or '>'. Got '%(data)s'"),
+        "Expected space or '>'. Got '%(data)s'",
    "unexpected-end-of-doctype":
-        _("Unexpected end of DOCTYPE."),
+        "Unexpected end of DOCTYPE.",
    "unexpected-char-in-doctype":
-        _("Unexpected character in DOCTYPE."),
+        "Unexpected character in DOCTYPE.",
    "eof-in-innerhtml":
-        _("XXX innerHTML EOF"),
+        "XXX innerHTML EOF",
    "unexpected-doctype":
-        _("Unexpected DOCTYPE. Ignored."),
+        "Unexpected DOCTYPE. Ignored.",
    "non-html-root":
-        _("html needs to be the first start tag."),
+        "html needs to be the first start tag.",
    "expected-doctype-but-got-eof":
-        _("Unexpected End of file. Expected DOCTYPE."),
+        "Unexpected End of file. Expected DOCTYPE.",
    "unknown-doctype":
-        _("Erroneous DOCTYPE."),
+        "Erroneous DOCTYPE.",
    "expected-doctype-but-got-chars":
-        _("Unexpected non-space characters. Expected DOCTYPE."),
+        "Unexpected non-space characters. Expected DOCTYPE.",
    "expected-doctype-but-got-start-tag":
-        _("Unexpected start tag (%(name)s). Expected DOCTYPE."),
+        "Unexpected start tag (%(name)s). Expected DOCTYPE.",
    "expected-doctype-but-got-end-tag":
-        _("Unexpected end tag (%(name)s). Expected DOCTYPE."),
+        "Unexpected end tag (%(name)s). Expected DOCTYPE.",
    "end-tag-after-implied-root":
-        _("Unexpected end tag (%(name)s) after the (implied) root element."),
+        "Unexpected end tag (%(name)s) after the (implied) root element.",
    "expected-named-closing-tag-but-got-eof":
-        _("Unexpected end of file. Expected end tag (%(name)s)."),
+        "Unexpected end of file. Expected end tag (%(name)s).",
    "two-heads-are-not-better-than-one":
-        _("Unexpected start tag head in existing head. Ignored."),
+        "Unexpected start tag head in existing head. Ignored.",
    "unexpected-end-tag":
-        _("Unexpected end tag (%(name)s). Ignored."),
+        "Unexpected end tag (%(name)s). Ignored.",
    "unexpected-start-tag-out-of-my-head":
-        _("Unexpected start tag (%(name)s) that can be in head. Moved."),
+        "Unexpected start tag (%(name)s) that can be in head. Moved.",
    "unexpected-start-tag":
-        _("Unexpected start tag (%(name)s)."),
+        "Unexpected start tag (%(name)s).",
    "missing-end-tag":
-        _("Missing end tag (%(name)s)."),
+        "Missing end tag (%(name)s).",
    "missing-end-tags":
-        _("Missing end tags (%(name)s)."),
+        "Missing end tags (%(name)s).",
    "unexpected-start-tag-implies-end-tag":
-        _("Unexpected start tag (%(startName)s) "
+        "Unexpected start tag (%(startName)s) "
-          "implies end tag (%(endName)s)."),
+        "implies end tag (%(endName)s).",
    "unexpected-start-tag-treated-as":
-        _("Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
+        "Unexpected start tag (%(originalName)s). Treated as %(newName)s.",
    "deprecated-tag":
-        _("Unexpected start tag %(name)s. Don't use it!"),
+        "Unexpected start tag %(name)s. Don't use it!",
    "unexpected-start-tag-ignored":
-        _("Unexpected start tag %(name)s. Ignored."),
+        "Unexpected start tag %(name)s. Ignored.",
    "expected-one-end-tag-but-got-another":
-        _("Unexpected end tag (%(gotName)s). "
+        "Unexpected end tag (%(gotName)s). "
-          "Missing end tag (%(expectedName)s)."),
+        "Missing end tag (%(expectedName)s).",
    "end-tag-too-early":
-        _("End tag (%(name)s) seen too early. Expected other end tag."),
+        "End tag (%(name)s) seen too early. Expected other end tag.",
    "end-tag-too-early-named":
-        _("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
+        "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).",
    "end-tag-too-early-ignored":
-        _("End tag (%(name)s) seen too early. Ignored."),
+        "End tag (%(name)s) seen too early. Ignored.",
    "adoption-agency-1.1":
-        _("End tag (%(name)s) violates step 1, "
+        "End tag (%(name)s) violates step 1, "
-          "paragraph 1 of the adoption agency algorithm."),
+        "paragraph 1 of the adoption agency algorithm.",
    "adoption-agency-1.2":
-        _("End tag (%(name)s) violates step 1, "
+        "End tag (%(name)s) violates step 1, "
-          "paragraph 2 of the adoption agency algorithm."),
+        "paragraph 2 of the adoption agency algorithm.",
    "adoption-agency-1.3":
-        _("End tag (%(name)s) violates step 1, "
+        "End tag (%(name)s) violates step 1, "
-          "paragraph 3 of the adoption agency algorithm."),
+        "paragraph 3 of the adoption agency algorithm.",
    "adoption-agency-4.4":
-        _("End tag (%(name)s) violates step 4, "
+        "End tag (%(name)s) violates step 4, "
-          "paragraph 4 of the adoption agency algorithm."),
+        "paragraph 4 of the adoption agency algorithm.",
    "unexpected-end-tag-treated-as":
-        _("Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
+        "Unexpected end tag (%(originalName)s). Treated as %(newName)s.",
    "no-end-tag":
-        _("This element (%(name)s) has no end tag."),
+        "This element (%(name)s) has no end tag.",
    "unexpected-implied-end-tag-in-table":
-        _("Unexpected implied end tag (%(name)s) in the table phase."),
+        "Unexpected implied end tag (%(name)s) in the table phase.",
    "unexpected-implied-end-tag-in-table-body":
-        _("Unexpected implied end tag (%(name)s) in the table body phase."),
+        "Unexpected implied end tag (%(name)s) in the table body phase.",
    "unexpected-char-implies-table-voodoo":
-        _("Unexpected non-space characters in "
+        "Unexpected non-space characters in "
-          "table context caused voodoo mode."),
+        "table context caused voodoo mode.",
    "unexpected-hidden-input-in-table":
-        _("Unexpected input with type hidden in table context."),
+        "Unexpected input with type hidden in table context.",
    "unexpected-form-in-table":
-        _("Unexpected form in table context."),
+        "Unexpected form in table context.",
    "unexpected-start-tag-implies-table-voodoo":
-        _("Unexpected start tag (%(name)s) in "
+        "Unexpected start tag (%(name)s) in "
-          "table context caused voodoo mode."),
+        "table context caused voodoo mode.",
    "unexpected-end-tag-implies-table-voodoo":
-        _("Unexpected end tag (%(name)s) in "
+        "Unexpected end tag (%(name)s) in "
-          "table context caused voodoo mode."),
+        "table context caused voodoo mode.",
    "unexpected-cell-in-table-body":
-        _("Unexpected table cell start tag (%(name)s) "
+        "Unexpected table cell start tag (%(name)s) "
-          "in the table body phase."),
+        "in the table body phase.",
    "unexpected-cell-end-tag":
-        _("Got table cell end tag (%(name)s) "
+        "Got table cell end tag (%(name)s) "
-          "while required end tags are missing."),
+        "while required end tags are missing.",
    "unexpected-end-tag-in-table-body":
-        _("Unexpected end tag (%(name)s) in the table body phase. Ignored."),
+        "Unexpected end tag (%(name)s) in the table body phase. Ignored.",
    "unexpected-implied-end-tag-in-table-row":
-        _("Unexpected implied end tag (%(name)s) in the table row phase."),
+        "Unexpected implied end tag (%(name)s) in the table row phase.",
    "unexpected-end-tag-in-table-row":
-        _("Unexpected end tag (%(name)s) in the table row phase. Ignored."),
+        "Unexpected end tag (%(name)s) in the table row phase. Ignored.",
    "unexpected-select-in-select":
-        _("Unexpected select start tag in the select phase "
+        "Unexpected select start tag in the select phase "
-          "treated as select end tag."),
+        "treated as select end tag.",
    "unexpected-input-in-select":
-        _("Unexpected input start tag in the select phase."),
+        "Unexpected input start tag in the select phase.",
    "unexpected-start-tag-in-select":
-        _("Unexpected start tag token (%(name)s in the select phase. "
+        "Unexpected start tag token (%(name)s in the select phase. "
-          "Ignored."),
+        "Ignored.",
    "unexpected-end-tag-in-select":
-        _("Unexpected end tag (%(name)s) in the select phase. Ignored."),
+        "Unexpected end tag (%(name)s) in the select phase. Ignored.",
    "unexpected-table-element-start-tag-in-select-in-table":
-        _("Unexpected table element start tag (%(name)s) in the select in table phase."),
+        "Unexpected table element start tag (%(name)s) in the select in table phase.",
    "unexpected-table-element-end-tag-in-select-in-table":
-        _("Unexpected table element end tag (%(name)s) in the select in table phase."),
+        "Unexpected table element end tag (%(name)s) in the select in table phase.",
    "unexpected-char-after-body":
-        _("Unexpected non-space characters in the after body phase."),
+        "Unexpected non-space characters in the after body phase.",
    "unexpected-start-tag-after-body":
-        _("Unexpected start tag token (%(name)s)"
+        "Unexpected start tag token (%(name)s)"
-          " in the after body phase."),
+        " in the after body phase.",
    "unexpected-end-tag-after-body":
-        _("Unexpected end tag token (%(name)s)"
+        "Unexpected end tag token (%(name)s)"
-          " in the after body phase."),
+        " in the after body phase.",
    "unexpected-char-in-frameset":
-        _("Unexpected characters in the frameset phase. Characters ignored."),
+        "Unexpected characters in the frameset phase. Characters ignored.",
    "unexpected-start-tag-in-frameset":
-        _("Unexpected start tag token (%(name)s)"
+        "Unexpected start tag token (%(name)s)"
-          " in the frameset phase. Ignored."),
+        " in the frameset phase. Ignored.",
    "unexpected-frameset-in-frameset-innerhtml":
-        _("Unexpected end tag token (frameset) "
+        "Unexpected end tag token (frameset) "
-          "in the frameset phase (innerHTML)."),
+        "in the frameset phase (innerHTML).",
    "unexpected-end-tag-in-frameset":
-        _("Unexpected end tag token (%(name)s)"
+        "Unexpected end tag token (%(name)s)"
-          " in the frameset phase. Ignored."),
+        " in the frameset phase. Ignored.",
    "unexpected-char-after-frameset":
-        _("Unexpected non-space characters in the "
+        "Unexpected non-space characters in the "
-          "after frameset phase. Ignored."),
+        "after frameset phase. Ignored.",
    "unexpected-start-tag-after-frameset":
-        _("Unexpected start tag (%(name)s)"
+        "Unexpected start tag (%(name)s)"
-          " in the after frameset phase. Ignored."),
+        " in the after frameset phase. Ignored.",
    "unexpected-end-tag-after-frameset":
-        _("Unexpected end tag (%(name)s)"
+        "Unexpected end tag (%(name)s)"
-          " in the after frameset phase. Ignored."),
+        " in the after frameset phase. Ignored.",
    "unexpected-end-tag-after-body-innerhtml":
-        _("Unexpected end tag after body(innerHtml)"),
+        "Unexpected end tag after body(innerHtml)",
    "expected-eof-but-got-char":
-        _("Unexpected non-space characters. Expected end of file."),
+        "Unexpected non-space characters. Expected end of file.",
    "expected-eof-but-got-start-tag":
-        _("Unexpected start tag (%(name)s)"
+        "Unexpected start tag (%(name)s)"
-          ". Expected end of file."),
+        ". Expected end of file.",
    "expected-eof-but-got-end-tag":
-        _("Unexpected end tag (%(name)s)"
+        "Unexpected end tag (%(name)s)"
-          ". Expected end of file."),
+        ". Expected end of file.",
    "eof-in-table":
-        _("Unexpected end of file. Expected table content."),
+        "Unexpected end of file. Expected table content.",
    "eof-in-select":
-        _("Unexpected end of file. Expected select content."),
+        "Unexpected end of file. Expected select content.",
    "eof-in-frameset":
-        _("Unexpected end of file. Expected frameset content."),
+        "Unexpected end of file. Expected frameset content.",
    "eof-in-script-in-script":
-        _("Unexpected end of file. Expected script content."),
+        "Unexpected end of file. Expected script content.",
    "eof-in-foreign-lands":
-        _("Unexpected end of file. Expected foreign content"),
+        "Unexpected end of file. Expected foreign content",
    "non-void-element-with-trailing-solidus":
-        _("Trailing solidus not allowed on element %(name)s"),
+        "Trailing solidus not allowed on element %(name)s",
    "unexpected-html-element-in-foreign-content":
-        _("Element %(name)s not allowed in a non-html context"),
+        "Element %(name)s not allowed in a non-html context",
    "unexpected-end-tag-before-html":
-        _("Unexpected end tag (%(name)s) before html."),
+        "Unexpected end tag (%(name)s) before html.",
    "unexpected-inhead-noscript-tag":
        "Element %(name)s not allowed in a inhead-noscript context",
    "eof-in-head-noscript":
        "Unexpected end of file. Expected inhead-noscript content",
    "char-in-head-noscript":
        "Unexpected non-space character. Expected inhead-noscript content",
    "XXX-undefined-error":
-        _("Undefined error (this sucks and should be fixed)"),
+        "Undefined error (this sucks and should be fixed)",
 }
 namespaces = {
@ -298,7 +302,7 @@ namespaces = {
    "xmlns": "http://www.w3.org/2000/xmlns/"
 }
-scopingElements = frozenset((
+scopingElements = frozenset([
    (namespaces["html"], "applet"),
    (namespaces["html"], "caption"),
    (namespaces["html"], "html"),
@ -316,9 +320,9 @@ scopingElements = frozenset((
    (namespaces["svg"], "foreignObject"),
    (namespaces["svg"], "desc"),
    (namespaces["svg"], "title"),
-))
+])
-formattingElements = frozenset((
+formattingElements = frozenset([
    (namespaces["html"], "a"),
    (namespaces["html"], "b"),
    (namespaces["html"], "big"),
@ -333,9 +337,9 @@ formattingElements = frozenset((
    (namespaces["html"], "strong"),
    (namespaces["html"], "tt"),
    (namespaces["html"], "u")
-))
+])
-specialElements = frozenset((
+specialElements = frozenset([
    (namespaces["html"], "address"),
    (namespaces["html"], "applet"),
    (namespaces["html"], "area"),
@ -416,22 +420,89 @@ specialElements = frozenset((
    (namespaces["html"], "wbr"),
    (namespaces["html"], "xmp"),
    (namespaces["svg"], "foreignObject")
-))
+])
-htmlIntegrationPointElements = frozenset((
+htmlIntegrationPointElements = frozenset([
-    (namespaces["mathml"], "annotaion-xml"),
+    (namespaces["mathml"], "annotation-xml"),
    (namespaces["svg"], "foreignObject"),
    (namespaces["svg"], "desc"),
    (namespaces["svg"], "title")
-))
+])
-mathmlTextIntegrationPointElements = frozenset((
+mathmlTextIntegrationPointElements = frozenset([
    (namespaces["mathml"], "mi"),
    (namespaces["mathml"], "mo"),
    (namespaces["mathml"], "mn"),
    (namespaces["mathml"], "ms"),
    (namespaces["mathml"], "mtext")
-))
+])
 adjustSVGAttributes = {
    "attributename": "attributeName",
    "attributetype": "attributeType",
    "basefrequency": "baseFrequency",
    "baseprofile": "baseProfile",
    "calcmode": "calcMode",
    "clippathunits": "clipPathUnits",
    "contentscripttype": "contentScriptType",
    "contentstyletype": "contentStyleType",
    "diffuseconstant": "diffuseConstant",
    "edgemode": "edgeMode",
    "externalresourcesrequired": "externalResourcesRequired",
    "filterres": "filterRes",
    "filterunits": "filterUnits",
    "glyphref": "glyphRef",
    "gradienttransform": "gradientTransform",
    "gradientunits": "gradientUnits",
    "kernelmatrix": "kernelMatrix",
    "kernelunitlength": "kernelUnitLength",
    "keypoints": "keyPoints",
    "keysplines": "keySplines",
    "keytimes": "keyTimes",
    "lengthadjust": "lengthAdjust",
    "limitingconeangle": "limitingConeAngle",
    "markerheight": "markerHeight",
    "markerunits": "markerUnits",
    "markerwidth": "markerWidth",
    "maskcontentunits": "maskContentUnits",
    "maskunits": "maskUnits",
    "numoctaves": "numOctaves",
    "pathlength": "pathLength",
    "patterncontentunits": "patternContentUnits",
    "patterntransform": "patternTransform",
    "patternunits": "patternUnits",
    "pointsatx": "pointsAtX",
    "pointsaty": "pointsAtY",
    "pointsatz": "pointsAtZ",
    "preservealpha": "preserveAlpha",
    "preserveaspectratio": "preserveAspectRatio",
    "primitiveunits": "primitiveUnits",
    "refx": "refX",
    "refy": "refY",
    "repeatcount": "repeatCount",
    "repeatdur": "repeatDur",
    "requiredextensions": "requiredExtensions",
    "requiredfeatures": "requiredFeatures",
    "specularconstant": "specularConstant",
    "specularexponent": "specularExponent",
    "spreadmethod": "spreadMethod",
    "startoffset": "startOffset",
    "stddeviation": "stdDeviation",
    "stitchtiles": "stitchTiles",
    "surfacescale": "surfaceScale",
    "systemlanguage": "systemLanguage",
    "tablevalues": "tableValues",
    "targetx": "targetX",
    "targety": "targetY",
    "textlength": "textLength",
    "viewbox": "viewBox",
    "viewtarget": "viewTarget",
    "xchannelselector": "xChannelSelector",
    "ychannelselector": "yChannelSelector",
    "zoomandpan": "zoomAndPan"
 }
 adjustMathMLAttributes = {"definitionurl": "definitionURL"}
 adjustForeignAttributes = {
    "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
@ -451,21 +522,21 @@ adjustForeignAttributes = {
 unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
                                  adjustForeignAttributes.items()])
-spaceCharacters = frozenset((
+spaceCharacters = frozenset([
    "\t",
    "\n",
    "\u000C",
    " ",
    "\r"
-))
+])
-tableInsertModeElements = frozenset((
+tableInsertModeElements = frozenset([
    "table",
    "tbody",
    "tfoot",
    "thead",
    "tr"
-))
+])
 asciiLowercase = frozenset(string.ascii_lowercase)
 asciiUppercase = frozenset(string.ascii_uppercase)
@ -486,7 +557,7 @@ headingElements = (
    "h6"
 )
-voidElements = frozenset((
+voidElements = frozenset([
    "base",
    "command",
    "event-source",
@ -502,11 +573,11 @@ voidElements = frozenset((
    "input",
    "source",
    "track"
-))
+])
-cdataElements = frozenset(('title', 'textarea'))
+cdataElements = frozenset(['title', 'textarea'])
-rcdataElements = frozenset((
+rcdataElements = frozenset([
    'style',
    'script',
    'xmp',
@ -514,27 +585,28 @@ rcdataElements = frozenset((
    'noembed',
    'noframes',
    'noscript'
-))
+])
 booleanAttributes = {
-    "": frozenset(("irrelevant",)),
+    "": frozenset(["irrelevant", "itemscope"]),
-    "style": frozenset(("scoped",)),
+    "style": frozenset(["scoped"]),
-    "img": frozenset(("ismap",)),
+    "img": frozenset(["ismap"]),
-    "audio": frozenset(("autoplay", "controls")),
+    "audio": frozenset(["autoplay", "controls"]),
-    "video": frozenset(("autoplay", "controls")),
+    "video": frozenset(["autoplay", "controls"]),
-    "script": frozenset(("defer", "async")),
+    "script": frozenset(["defer", "async"]),
-    "details": frozenset(("open",)),
+    "details": frozenset(["open"]),
-    "datagrid": frozenset(("multiple", "disabled")),
+    "datagrid": frozenset(["multiple", "disabled"]),
-    "command": frozenset(("hidden", "disabled", "checked", "default")),
+    "command": frozenset(["hidden", "disabled", "checked", "default"]),
-    "hr": frozenset(("noshade")),
+    "hr": frozenset(["noshade"]),
-    "menu": frozenset(("autosubmit",)),
+    "menu": frozenset(["autosubmit"]),
-    "fieldset": frozenset(("disabled", "readonly")),
+    "fieldset": frozenset(["disabled", "readonly"]),
-    "option": frozenset(("disabled", "readonly", "selected")),
+    "option": frozenset(["disabled", "readonly", "selected"]),
-    "optgroup": frozenset(("disabled", "readonly")),
+    "optgroup": frozenset(["disabled", "readonly"]),
-    "button": frozenset(("disabled", "autofocus")),
+    "button": frozenset(["disabled", "autofocus"]),
-    "input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")),
+    "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]),
-    "select": frozenset(("disabled", "readonly", "autofocus", "multiple")),
+    "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]),
-    "output": frozenset(("disabled", "readonly")),
+    "output": frozenset(["disabled", "readonly"]),
    "iframe": frozenset(["seamless"]),
 }
 # entitiesWindows1252 has to be _ordered_ and needs to have an index. It
@ -574,7 +646,7 @@ entitiesWindows1252 = (
    376     # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
 )
-xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;'))
+xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;'])
 entities = {
    "AElig": "\xc6",
@ -2815,7 +2887,6 @@ replacementCharacters = {
    0x0d: "\u000D",
    0x80: "\u20AC",
    0x81: "\u0081",
    0x81: "\u0081",
    0x82: "\u201A",
    0x83: "\u0192",
    0x84: "\u201E",
@ -2848,235 +2919,6 @@ replacementCharacters = {
    0x9F: "\u0178",
 }
 encodings = {
    '437': 'cp437',
    '850': 'cp850',
    '852': 'cp852',
    '855': 'cp855',
    '857': 'cp857',
    '860': 'cp860',
    '861': 'cp861',
    '862': 'cp862',
    '863': 'cp863',
    '865': 'cp865',
    '866': 'cp866',
    '869': 'cp869',
    'ansix341968': 'ascii',
    'ansix341986': 'ascii',
    'arabic': 'iso8859-6',
    'ascii': 'ascii',
    'asmo708': 'iso8859-6',
    'big5': 'big5',
    'big5hkscs': 'big5hkscs',
    'chinese': 'gbk',
    'cp037': 'cp037',
    'cp1026': 'cp1026',
    'cp154': 'ptcp154',
    'cp367': 'ascii',
    'cp424': 'cp424',
    'cp437': 'cp437',
    'cp500': 'cp500',
    'cp775': 'cp775',
    'cp819': 'windows-1252',
    'cp850': 'cp850',
    'cp852': 'cp852',
    'cp855': 'cp855',
    'cp857': 'cp857',
    'cp860': 'cp860',
    'cp861': 'cp861',
    'cp862': 'cp862',
    'cp863': 'cp863',
    'cp864': 'cp864',
    'cp865': 'cp865',
    'cp866': 'cp866',
    'cp869': 'cp869',
    'cp936': 'gbk',
    'cpgr': 'cp869',
    'cpis': 'cp861',
    'csascii': 'ascii',
    'csbig5': 'big5',
    'cseuckr': 'cp949',
    'cseucpkdfmtjapanese': 'euc_jp',
    'csgb2312': 'gbk',
    'cshproman8': 'hp-roman8',
    'csibm037': 'cp037',
    'csibm1026': 'cp1026',
    'csibm424': 'cp424',
    'csibm500': 'cp500',
    'csibm855': 'cp855',
    'csibm857': 'cp857',
    'csibm860': 'cp860',
    'csibm861': 'cp861',
    'csibm863': 'cp863',
    'csibm864': 'cp864',
    'csibm865': 'cp865',
    'csibm866': 'cp866',
    'csibm869': 'cp869',
    'csiso2022jp': 'iso2022_jp',
    'csiso2022jp2': 'iso2022_jp_2',
    'csiso2022kr': 'iso2022_kr',
    'csiso58gb231280': 'gbk',
    'csisolatin1': 'windows-1252',
    'csisolatin2': 'iso8859-2',
    'csisolatin3': 'iso8859-3',
    'csisolatin4': 'iso8859-4',
    'csisolatin5': 'windows-1254',
    'csisolatin6': 'iso8859-10',
    'csisolatinarabic': 'iso8859-6',
    'csisolatincyrillic': 'iso8859-5',
    'csisolatingreek': 'iso8859-7',
    'csisolatinhebrew': 'iso8859-8',
    'cskoi8r': 'koi8-r',
    'csksc56011987': 'cp949',
    'cspc775baltic': 'cp775',
    'cspc850multilingual': 'cp850',
    'cspc862latinhebrew': 'cp862',
    'cspc8codepage437': 'cp437',
    'cspcp852': 'cp852',
    'csptcp154': 'ptcp154',
    'csshiftjis': 'shift_jis',
    'csunicode11utf7': 'utf-7',
    'cyrillic': 'iso8859-5',
    'cyrillicasian': 'ptcp154',
    'ebcdiccpbe': 'cp500',
    'ebcdiccpca': 'cp037',
    'ebcdiccpch': 'cp500',
    'ebcdiccphe': 'cp424',
    'ebcdiccpnl': 'cp037',
    'ebcdiccpus': 'cp037',
    'ebcdiccpwt': 'cp037',
    'ecma114': 'iso8859-6',
    'ecma118': 'iso8859-7',
    'elot928': 'iso8859-7',
    'eucjp': 'euc_jp',
    'euckr': 'cp949',
    'extendedunixcodepackedformatforjapanese': 'euc_jp',
    'gb18030': 'gb18030',
    'gb2312': 'gbk',
    'gb231280': 'gbk',
    'gbk': 'gbk',
    'greek': 'iso8859-7',
    'greek8': 'iso8859-7',
    'hebrew': 'iso8859-8',
    'hproman8': 'hp-roman8',
    'hzgb2312': 'hz',
    'ibm037': 'cp037',
    'ibm1026': 'cp1026',
    'ibm367': 'ascii',
    'ibm424': 'cp424',
    'ibm437': 'cp437',
    'ibm500': 'cp500',
    'ibm775': 'cp775',
    'ibm819': 'windows-1252',
    'ibm850': 'cp850',
    'ibm852': 'cp852',
    'ibm855': 'cp855',
    'ibm857': 'cp857',
    'ibm860': 'cp860',
    'ibm861': 'cp861',
    'ibm862': 'cp862',
    'ibm863': 'cp863',
    'ibm864': 'cp864',
    'ibm865': 'cp865',
    'ibm866': 'cp866',
    'ibm869': 'cp869',
    'iso2022jp': 'iso2022_jp',
    'iso2022jp2': 'iso2022_jp_2',
    'iso2022kr': 'iso2022_kr',
    'iso646irv1991': 'ascii',
    'iso646us': 'ascii',
    'iso88591': 'windows-1252',
    'iso885910': 'iso8859-10',
    'iso8859101992': 'iso8859-10',
    'iso885911987': 'windows-1252',
    'iso885913': 'iso8859-13',
    'iso885914': 'iso8859-14',
    'iso8859141998': 'iso8859-14',
    'iso885915': 'iso8859-15',
    'iso885916': 'iso8859-16',
    'iso8859162001': 'iso8859-16',
    'iso88592': 'iso8859-2',
    'iso885921987': 'iso8859-2',
    'iso88593': 'iso8859-3',
    'iso885931988': 'iso8859-3',
    'iso88594': 'iso8859-4',
    'iso885941988': 'iso8859-4',
    'iso88595': 'iso8859-5',
    'iso885951988': 'iso8859-5',
    'iso88596': 'iso8859-6',
    'iso885961987': 'iso8859-6',
    'iso88597': 'iso8859-7',
    'iso885971987': 'iso8859-7',
    'iso88598': 'iso8859-8',
    'iso885981988': 'iso8859-8',
    'iso88599': 'windows-1254',
    'iso885991989': 'windows-1254',
    'isoceltic': 'iso8859-14',
    'isoir100': 'windows-1252',
    'isoir101': 'iso8859-2',
    'isoir109': 'iso8859-3',
    'isoir110': 'iso8859-4',
    'isoir126': 'iso8859-7',
    'isoir127': 'iso8859-6',
    'isoir138': 'iso8859-8',
    'isoir144': 'iso8859-5',
    'isoir148': 'windows-1254',
    'isoir149': 'cp949',
    'isoir157': 'iso8859-10',
    'isoir199': 'iso8859-14',
    'isoir226': 'iso8859-16',
    'isoir58': 'gbk',
    'isoir6': 'ascii',
    'koi8r': 'koi8-r',
    'koi8u': 'koi8-u',
    'korean': 'cp949',
    'ksc5601': 'cp949',
    'ksc56011987': 'cp949',
    'ksc56011989': 'cp949',
    'l1': 'windows-1252',
    'l10': 'iso8859-16',
    'l2': 'iso8859-2',
    'l3': 'iso8859-3',
    'l4': 'iso8859-4',
    'l5': 'windows-1254',
    'l6': 'iso8859-10',
    'l8': 'iso8859-14',
    'latin1': 'windows-1252',
    'latin10': 'iso8859-16',
    'latin2': 'iso8859-2',
    'latin3': 'iso8859-3',
    'latin4': 'iso8859-4',
    'latin5': 'windows-1254',
    'latin6': 'iso8859-10',
    'latin8': 'iso8859-14',
    'latin9': 'iso8859-15',
    'ms936': 'gbk',
    'mskanji': 'shift_jis',
    'pt154': 'ptcp154',
    'ptcp154': 'ptcp154',
    'r8': 'hp-roman8',
    'roman8': 'hp-roman8',
    'shiftjis': 'shift_jis',
    'tis620': 'cp874',
    'unicode11utf7': 'utf-7',
    'us': 'ascii',
    'usascii': 'ascii',
    'utf16': 'utf-16',
    'utf16be': 'utf-16-be',
    'utf16le': 'utf-16-le',
    'utf8': 'utf-8',
    'windows1250': 'cp1250',
    'windows1251': 'cp1251',
    'windows1252': 'cp1252',
    'windows1253': 'cp1253',
    'windows1254': 'cp1254',
    'windows1255': 'cp1255',
    'windows1256': 'cp1256',
    'windows1257': 'cp1257',
    'windows1258': 'cp1258',
    'windows936': 'gbk',
    'x-x-big5': 'big5'}
 tokenTypes = {
    "Doctype": 0,
    "Characters": 1,
@ -3088,8 +2930,8 @@ tokenTypes = {
    "ParseError": 7
 }
-tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],
+tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
-                           tokenTypes["EmptyTag"]))
+                           tokenTypes["EmptyTag"]])
 prefixes = dict([(v, k) for k, v in namespaces.items()])
@ -3097,8 +2939,9 @@ prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
 class DataLossWarning(UserWarning):
    """Raised when the current tree is unable to represent the input data"""
    pass
-class ReparseException(Exception):
+class _ReparseException(Exception):
    pass
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/init.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/init.py
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/alphabeticalattributes.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/alphabeticalattributes.py
@ -0,0 +1,29 @@
 from __future__ import absolute_import, division, unicode_literals
 from . import base
 from collections import OrderedDict
 def _attr_key(attr):
    """Return an appropriate key for an attribute for sorting
    Attributes have a namespace that can be either ``None`` or a string. We
    can't compare the two because they're different types, so we convert
    ``None`` to an empty string first.
    """
    return (attr[0][0] or ''), attr[0][1]
 class Filter(base.Filter):
    """Alphabetizes attributes for elements"""
    def __iter__(self):
        for token in base.Filter.__iter__(self):
            if token["type"] in ("StartTag", "EmptyTag"):
                attrs = OrderedDict()
                for name, value in sorted(token["data"].items(),
                                          key=_attr_key):
                    attrs[name] = value
                token["data"] = attrs
            yield token
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/base.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/base.py
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/inject_meta_charset.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/inject_meta_charset.py
@ -1,11 +1,19 @@
 from __future__ import absolute_import, division, unicode_literals
-from . import _base
+from . import base
-class Filter(_base.Filter):
+class Filter(base.Filter):
    """Injects ``<meta charset=ENCODING>`` tag into head of document"""
    def __init__(self, source, encoding):
-        _base.Filter.__init__(self, source)
+        """Creates a Filter
        :arg source: the source token stream
        :arg encoding: the encoding to set
        """
        base.Filter.__init__(self, source)
        self.encoding = encoding
    def __iter__(self):
@ -13,7 +21,7 @@ class Filter(_base.Filter):
        meta_found = (self.encoding is None)
        pending = []
-        for token in _base.Filter.__iter__(self):
+        for token in base.Filter.__iter__(self):
            type = token["type"]
            if type == "StartTag":
                if token["name"].lower() == "head":
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/lint.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/lint.py
@ -0,0 +1,93 @@
 from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 from . import base
 from ..constants import namespaces, voidElements
 from ..constants import spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
 class Filter(base.Filter):
    """Lints the token stream for errors
    If it finds any errors, it'll raise an ``AssertionError``.
    """
    def __init__(self, source, require_matching_tags=True):
        """Creates a Filter
        :arg source: the source token stream
        :arg require_matching_tags: whether or not to require matching tags
        """
        super(Filter, self).__init__(source)
        self.require_matching_tags = require_matching_tags
    def __iter__(self):
        open_elements = []
        for token in base.Filter.__iter__(self):
            type = token["type"]
            if type in ("StartTag", "EmptyTag"):
                namespace = token["namespace"]
                name = token["name"]
                assert namespace is None or isinstance(namespace, text_type)
                assert namespace != ""
                assert isinstance(name, text_type)
                assert name != ""
                assert isinstance(token["data"], dict)
                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
                    assert type == "EmptyTag"
                else:
                    assert type == "StartTag"
                if type == "StartTag" and self.require_matching_tags:
                    open_elements.append((namespace, name))
                for (namespace, name), value in token["data"].items():
                    assert namespace is None or isinstance(namespace, text_type)
                    assert namespace != ""
                    assert isinstance(name, text_type)
                    assert name != ""
                    assert isinstance(value, text_type)
            elif type == "EndTag":
                namespace = token["namespace"]
                name = token["name"]
                assert namespace is None or isinstance(namespace, text_type)
                assert namespace != ""
                assert isinstance(name, text_type)
                assert name != ""
                if (not namespace or namespace == namespaces["html"]) and name in voidElements:
                    assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
                elif self.require_matching_tags:
                    start = open_elements.pop()
                    assert start == (namespace, name)
            elif type == "Comment":
                data = token["data"]
                assert isinstance(data, text_type)
            elif type in ("Characters", "SpaceCharacters"):
                data = token["data"]
                assert isinstance(data, text_type)
                assert data != ""
                if type == "SpaceCharacters":
                    assert data.strip(spaceCharacters) == ""
            elif type == "Doctype":
                name = token["name"]
                assert name is None or isinstance(name, text_type)
                assert token["publicId"] is None or isinstance(name, text_type)
                assert token["systemId"] is None or isinstance(name, text_type)
            elif type == "Entity":
                assert isinstance(token["name"], text_type)
            elif type == "SerializerError":
                assert isinstance(token["data"], text_type)
            else:
                assert False, "Unknown token type: %(type)s" % {"type": type}
            yield token
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/optionaltags.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/optionaltags.py
@ -1,9 +1,10 @@
 from __future__ import absolute_import, division, unicode_literals
-from . import _base
+from . import base
-class Filter(_base.Filter):
+class Filter(base.Filter):
    """Removes optional tags from the token stream"""
    def slider(self):
        previous1 = previous2 = None
        for token in self.source:
@ -11,7 +12,8 @@ class Filter(_base.Filter):
                yield previous2, previous1, token
            previous2 = previous1
            previous1 = token
-        yield previous2, previous1, None
+        if previous1 is not None:
            yield previous2, previous1, None
    def __iter__(self):
        for previous, token, next in self.slider():
@ -58,7 +60,7 @@ class Filter(_base.Filter):
        elif tagname == 'colgroup':
            # A colgroup element's start tag may be omitted if the first thing
            # inside the colgroup element is a col element, and if the element
-            # is not immediately preceeded by another colgroup element whose
+            # is not immediately preceded by another colgroup element whose
            # end tag has been omitted.
            if type in ("StartTag", "EmptyTag"):
                # XXX: we do not look at the preceding event, so instead we never
@ -70,7 +72,7 @@ class Filter(_base.Filter):
        elif tagname == 'tbody':
            # A tbody element's start tag may be omitted if the first thing
            # inside the tbody element is a tr element, and if the element is
-            # not immediately preceeded by a tbody, thead, or tfoot element
+            # not immediately preceded by a tbody, thead, or tfoot element
            # whose end tag has been omitted.
            if type == "StartTag":
                # omit the thead and tfoot elements' end tag when they are
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/sanitizer.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/sanitizer.py
@ -0,0 +1,896 @@
 from __future__ import absolute_import, division, unicode_literals
 import re
 from xml.sax.saxutils import escape, unescape
 from six.moves import urllib_parse as urlparse
 from . import base
 from ..constants import namespaces, prefixes
 __all__ = ["Filter"]
 allowed_elements = frozenset((
    (namespaces['html'], 'a'),
    (namespaces['html'], 'abbr'),
    (namespaces['html'], 'acronym'),
    (namespaces['html'], 'address'),
    (namespaces['html'], 'area'),
    (namespaces['html'], 'article'),
    (namespaces['html'], 'aside'),
    (namespaces['html'], 'audio'),
    (namespaces['html'], 'b'),
    (namespaces['html'], 'big'),
    (namespaces['html'], 'blockquote'),
    (namespaces['html'], 'br'),
    (namespaces['html'], 'button'),
    (namespaces['html'], 'canvas'),
    (namespaces['html'], 'caption'),
    (namespaces['html'], 'center'),
    (namespaces['html'], 'cite'),
    (namespaces['html'], 'code'),
    (namespaces['html'], 'col'),
    (namespaces['html'], 'colgroup'),
    (namespaces['html'], 'command'),
    (namespaces['html'], 'datagrid'),
    (namespaces['html'], 'datalist'),
    (namespaces['html'], 'dd'),
    (namespaces['html'], 'del'),
    (namespaces['html'], 'details'),
    (namespaces['html'], 'dfn'),
    (namespaces['html'], 'dialog'),
    (namespaces['html'], 'dir'),
    (namespaces['html'], 'div'),
    (namespaces['html'], 'dl'),
    (namespaces['html'], 'dt'),
    (namespaces['html'], 'em'),
    (namespaces['html'], 'event-source'),
    (namespaces['html'], 'fieldset'),
    (namespaces['html'], 'figcaption'),
    (namespaces['html'], 'figure'),
    (namespaces['html'], 'footer'),
    (namespaces['html'], 'font'),
    (namespaces['html'], 'form'),
    (namespaces['html'], 'header'),
    (namespaces['html'], 'h1'),
    (namespaces['html'], 'h2'),
    (namespaces['html'], 'h3'),
    (namespaces['html'], 'h4'),
    (namespaces['html'], 'h5'),
    (namespaces['html'], 'h6'),
    (namespaces['html'], 'hr'),
    (namespaces['html'], 'i'),
    (namespaces['html'], 'img'),
    (namespaces['html'], 'input'),
    (namespaces['html'], 'ins'),
    (namespaces['html'], 'keygen'),
    (namespaces['html'], 'kbd'),
    (namespaces['html'], 'label'),
    (namespaces['html'], 'legend'),
    (namespaces['html'], 'li'),
    (namespaces['html'], 'm'),
    (namespaces['html'], 'map'),
    (namespaces['html'], 'menu'),
    (namespaces['html'], 'meter'),
    (namespaces['html'], 'multicol'),
    (namespaces['html'], 'nav'),
    (namespaces['html'], 'nextid'),
    (namespaces['html'], 'ol'),
    (namespaces['html'], 'output'),
    (namespaces['html'], 'optgroup'),
    (namespaces['html'], 'option'),
    (namespaces['html'], 'p'),
    (namespaces['html'], 'pre'),
    (namespaces['html'], 'progress'),
    (namespaces['html'], 'q'),
    (namespaces['html'], 's'),
    (namespaces['html'], 'samp'),
    (namespaces['html'], 'section'),
    (namespaces['html'], 'select'),
    (namespaces['html'], 'small'),
    (namespaces['html'], 'sound'),
    (namespaces['html'], 'source'),
    (namespaces['html'], 'spacer'),
    (namespaces['html'], 'span'),
    (namespaces['html'], 'strike'),
    (namespaces['html'], 'strong'),
    (namespaces['html'], 'sub'),
    (namespaces['html'], 'sup'),
    (namespaces['html'], 'table'),
    (namespaces['html'], 'tbody'),
    (namespaces['html'], 'td'),
    (namespaces['html'], 'textarea'),
    (namespaces['html'], 'time'),
    (namespaces['html'], 'tfoot'),
    (namespaces['html'], 'th'),
    (namespaces['html'], 'thead'),
    (namespaces['html'], 'tr'),
    (namespaces['html'], 'tt'),
    (namespaces['html'], 'u'),
    (namespaces['html'], 'ul'),
    (namespaces['html'], 'var'),
    (namespaces['html'], 'video'),
    (namespaces['mathml'], 'maction'),
    (namespaces['mathml'], 'math'),
    (namespaces['mathml'], 'merror'),
    (namespaces['mathml'], 'mfrac'),
    (namespaces['mathml'], 'mi'),
    (namespaces['mathml'], 'mmultiscripts'),
    (namespaces['mathml'], 'mn'),
    (namespaces['mathml'], 'mo'),
    (namespaces['mathml'], 'mover'),
    (namespaces['mathml'], 'mpadded'),
    (namespaces['mathml'], 'mphantom'),
    (namespaces['mathml'], 'mprescripts'),
    (namespaces['mathml'], 'mroot'),
    (namespaces['mathml'], 'mrow'),
    (namespaces['mathml'], 'mspace'),
    (namespaces['mathml'], 'msqrt'),
    (namespaces['mathml'], 'mstyle'),
    (namespaces['mathml'], 'msub'),
    (namespaces['mathml'], 'msubsup'),
    (namespaces['mathml'], 'msup'),
    (namespaces['mathml'], 'mtable'),
    (namespaces['mathml'], 'mtd'),
    (namespaces['mathml'], 'mtext'),
    (namespaces['mathml'], 'mtr'),
    (namespaces['mathml'], 'munder'),
    (namespaces['mathml'], 'munderover'),
    (namespaces['mathml'], 'none'),
    (namespaces['svg'], 'a'),
    (namespaces['svg'], 'animate'),
    (namespaces['svg'], 'animateColor'),
    (namespaces['svg'], 'animateMotion'),
    (namespaces['svg'], 'animateTransform'),
    (namespaces['svg'], 'clipPath'),
    (namespaces['svg'], 'circle'),
    (namespaces['svg'], 'defs'),
    (namespaces['svg'], 'desc'),
    (namespaces['svg'], 'ellipse'),
    (namespaces['svg'], 'font-face'),
    (namespaces['svg'], 'font-face-name'),
    (namespaces['svg'], 'font-face-src'),
    (namespaces['svg'], 'g'),
    (namespaces['svg'], 'glyph'),
    (namespaces['svg'], 'hkern'),
    (namespaces['svg'], 'linearGradient'),
    (namespaces['svg'], 'line'),
    (namespaces['svg'], 'marker'),
    (namespaces['svg'], 'metadata'),
    (namespaces['svg'], 'missing-glyph'),
    (namespaces['svg'], 'mpath'),
    (namespaces['svg'], 'path'),
    (namespaces['svg'], 'polygon'),
    (namespaces['svg'], 'polyline'),
    (namespaces['svg'], 'radialGradient'),
    (namespaces['svg'], 'rect'),
    (namespaces['svg'], 'set'),
    (namespaces['svg'], 'stop'),
    (namespaces['svg'], 'svg'),
    (namespaces['svg'], 'switch'),
    (namespaces['svg'], 'text'),
    (namespaces['svg'], 'title'),
    (namespaces['svg'], 'tspan'),
    (namespaces['svg'], 'use'),
 ))
 allowed_attributes = frozenset((
    # HTML attributes
    (None, 'abbr'),
    (None, 'accept'),
    (None, 'accept-charset'),
    (None, 'accesskey'),
    (None, 'action'),
    (None, 'align'),
    (None, 'alt'),
    (None, 'autocomplete'),
    (None, 'autofocus'),
    (None, 'axis'),
    (None, 'background'),
    (None, 'balance'),
    (None, 'bgcolor'),
    (None, 'bgproperties'),
    (None, 'border'),
    (None, 'bordercolor'),
    (None, 'bordercolordark'),
    (None, 'bordercolorlight'),
    (None, 'bottompadding'),
    (None, 'cellpadding'),
    (None, 'cellspacing'),
    (None, 'ch'),
    (None, 'challenge'),
    (None, 'char'),
    (None, 'charoff'),
    (None, 'choff'),
    (None, 'charset'),
    (None, 'checked'),
    (None, 'cite'),
    (None, 'class'),
    (None, 'clear'),
    (None, 'color'),
    (None, 'cols'),
    (None, 'colspan'),
    (None, 'compact'),
    (None, 'contenteditable'),
    (None, 'controls'),
    (None, 'coords'),
    (None, 'data'),
    (None, 'datafld'),
    (None, 'datapagesize'),
    (None, 'datasrc'),
    (None, 'datetime'),
    (None, 'default'),
    (None, 'delay'),
    (None, 'dir'),
    (None, 'disabled'),
    (None, 'draggable'),
    (None, 'dynsrc'),
    (None, 'enctype'),
    (None, 'end'),
    (None, 'face'),
    (None, 'for'),
    (None, 'form'),
    (None, 'frame'),
    (None, 'galleryimg'),
    (None, 'gutter'),
    (None, 'headers'),
    (None, 'height'),
    (None, 'hidefocus'),
    (None, 'hidden'),
    (None, 'high'),
    (None, 'href'),
    (None, 'hreflang'),
    (None, 'hspace'),
    (None, 'icon'),
    (None, 'id'),
    (None, 'inputmode'),
    (None, 'ismap'),
    (None, 'keytype'),
    (None, 'label'),
    (None, 'leftspacing'),
    (None, 'lang'),
    (None, 'list'),
    (None, 'longdesc'),
    (None, 'loop'),
    (None, 'loopcount'),
    (None, 'loopend'),
    (None, 'loopstart'),
    (None, 'low'),
    (None, 'lowsrc'),
    (None, 'max'),
    (None, 'maxlength'),
    (None, 'media'),
    (None, 'method'),
    (None, 'min'),
    (None, 'multiple'),
    (None, 'name'),
    (None, 'nohref'),
    (None, 'noshade'),
    (None, 'nowrap'),
    (None, 'open'),
    (None, 'optimum'),
    (None, 'pattern'),
    (None, 'ping'),
    (None, 'point-size'),
    (None, 'poster'),
    (None, 'pqg'),
    (None, 'preload'),
    (None, 'prompt'),
    (None, 'radiogroup'),
    (None, 'readonly'),
    (None, 'rel'),
    (None, 'repeat-max'),
    (None, 'repeat-min'),
    (None, 'replace'),
    (None, 'required'),
    (None, 'rev'),
    (None, 'rightspacing'),
    (None, 'rows'),
    (None, 'rowspan'),
    (None, 'rules'),
    (None, 'scope'),
    (None, 'selected'),
    (None, 'shape'),
    (None, 'size'),
    (None, 'span'),
    (None, 'src'),
    (None, 'start'),
    (None, 'step'),
    (None, 'style'),
    (None, 'summary'),
    (None, 'suppress'),
    (None, 'tabindex'),
    (None, 'target'),
    (None, 'template'),
    (None, 'title'),
    (None, 'toppadding'),
    (None, 'type'),
    (None, 'unselectable'),
    (None, 'usemap'),
    (None, 'urn'),
    (None, 'valign'),
    (None, 'value'),
    (None, 'variable'),
    (None, 'volume'),
    (None, 'vspace'),
    (None, 'vrml'),
    (None, 'width'),
    (None, 'wrap'),
    (namespaces['xml'], 'lang'),
    # MathML attributes
    (None, 'actiontype'),
    (None, 'align'),
    (None, 'columnalign'),
    (None, 'columnalign'),
    (None, 'columnalign'),
    (None, 'columnlines'),
    (None, 'columnspacing'),
    (None, 'columnspan'),
    (None, 'depth'),
    (None, 'display'),
    (None, 'displaystyle'),
    (None, 'equalcolumns'),
    (None, 'equalrows'),
    (None, 'fence'),
    (None, 'fontstyle'),
    (None, 'fontweight'),
    (None, 'frame'),
    (None, 'height'),
    (None, 'linethickness'),
    (None, 'lspace'),
    (None, 'mathbackground'),
    (None, 'mathcolor'),
    (None, 'mathvariant'),
    (None, 'mathvariant'),
    (None, 'maxsize'),
    (None, 'minsize'),
    (None, 'other'),
    (None, 'rowalign'),
    (None, 'rowalign'),
    (None, 'rowalign'),
    (None, 'rowlines'),
    (None, 'rowspacing'),
    (None, 'rowspan'),
    (None, 'rspace'),
    (None, 'scriptlevel'),
    (None, 'selection'),
    (None, 'separator'),
    (None, 'stretchy'),
    (None, 'width'),
    (None, 'width'),
    (namespaces['xlink'], 'href'),
    (namespaces['xlink'], 'show'),
    (namespaces['xlink'], 'type'),
    # SVG attributes
    (None, 'accent-height'),
    (None, 'accumulate'),
    (None, 'additive'),
    (None, 'alphabetic'),
    (None, 'arabic-form'),
    (None, 'ascent'),
    (None, 'attributeName'),
    (None, 'attributeType'),
    (None, 'baseProfile'),
    (None, 'bbox'),
    (None, 'begin'),
    (None, 'by'),
    (None, 'calcMode'),
    (None, 'cap-height'),
    (None, 'class'),
    (None, 'clip-path'),
    (None, 'color'),
    (None, 'color-rendering'),
    (None, 'content'),
    (None, 'cx'),
    (None, 'cy'),
    (None, 'd'),
    (None, 'dx'),
    (None, 'dy'),
    (None, 'descent'),
    (None, 'display'),
    (None, 'dur'),
    (None, 'end'),
    (None, 'fill'),
    (None, 'fill-opacity'),
    (None, 'fill-rule'),
    (None, 'font-family'),
    (None, 'font-size'),
    (None, 'font-stretch'),
    (None, 'font-style'),
    (None, 'font-variant'),
    (None, 'font-weight'),
    (None, 'from'),
    (None, 'fx'),
    (None, 'fy'),
    (None, 'g1'),
    (None, 'g2'),
    (None, 'glyph-name'),
    (None, 'gradientUnits'),
    (None, 'hanging'),
    (None, 'height'),
    (None, 'horiz-adv-x'),
    (None, 'horiz-origin-x'),
    (None, 'id'),
    (None, 'ideographic'),
    (None, 'k'),
    (None, 'keyPoints'),
    (None, 'keySplines'),
    (None, 'keyTimes'),
    (None, 'lang'),
    (None, 'marker-end'),
    (None, 'marker-mid'),
    (None, 'marker-start'),
    (None, 'markerHeight'),
    (None, 'markerUnits'),
    (None, 'markerWidth'),
    (None, 'mathematical'),
    (None, 'max'),
    (None, 'min'),
    (None, 'name'),
    (None, 'offset'),
    (None, 'opacity'),
    (None, 'orient'),
    (None, 'origin'),
    (None, 'overline-position'),
    (None, 'overline-thickness'),
    (None, 'panose-1'),
    (None, 'path'),
    (None, 'pathLength'),
    (None, 'points'),
    (None, 'preserveAspectRatio'),
    (None, 'r'),
    (None, 'refX'),
    (None, 'refY'),
    (None, 'repeatCount'),
    (None, 'repeatDur'),
    (None, 'requiredExtensions'),
    (None, 'requiredFeatures'),
    (None, 'restart'),
    (None, 'rotate'),
    (None, 'rx'),
    (None, 'ry'),
    (None, 'slope'),
    (None, 'stemh'),
    (None, 'stemv'),
    (None, 'stop-color'),
    (None, 'stop-opacity'),
    (None, 'strikethrough-position'),
    (None, 'strikethrough-thickness'),
    (None, 'stroke'),
    (None, 'stroke-dasharray'),
    (None, 'stroke-dashoffset'),
    (None, 'stroke-linecap'),
    (None, 'stroke-linejoin'),
    (None, 'stroke-miterlimit'),
    (None, 'stroke-opacity'),
    (None, 'stroke-width'),
    (None, 'systemLanguage'),
    (None, 'target'),
    (None, 'text-anchor'),
    (None, 'to'),
    (None, 'transform'),
    (None, 'type'),
    (None, 'u1'),
    (None, 'u2'),
    (None, 'underline-position'),
    (None, 'underline-thickness'),
    (None, 'unicode'),
    (None, 'unicode-range'),
    (None, 'units-per-em'),
    (None, 'values'),
    (None, 'version'),
    (None, 'viewBox'),
    (None, 'visibility'),
    (None, 'width'),
    (None, 'widths'),
    (None, 'x'),
    (None, 'x-height'),
    (None, 'x1'),
    (None, 'x2'),
    (namespaces['xlink'], 'actuate'),
    (namespaces['xlink'], 'arcrole'),
    (namespaces['xlink'], 'href'),
    (namespaces['xlink'], 'role'),
    (namespaces['xlink'], 'show'),
    (namespaces['xlink'], 'title'),
    (namespaces['xlink'], 'type'),
    (namespaces['xml'], 'base'),
    (namespaces['xml'], 'lang'),
    (namespaces['xml'], 'space'),
    (None, 'y'),
    (None, 'y1'),
    (None, 'y2'),
    (None, 'zoomAndPan'),
 ))
 attr_val_is_uri = frozenset((
    (None, 'href'),
    (None, 'src'),
    (None, 'cite'),
    (None, 'action'),
    (None, 'longdesc'),
    (None, 'poster'),
    (None, 'background'),
    (None, 'datasrc'),
    (None, 'dynsrc'),
    (None, 'lowsrc'),
    (None, 'ping'),
    (namespaces['xlink'], 'href'),
    (namespaces['xml'], 'base'),
 ))
 svg_attr_val_allows_ref = frozenset((
    (None, 'clip-path'),
    (None, 'color-profile'),
    (None, 'cursor'),
    (None, 'fill'),
    (None, 'filter'),
    (None, 'marker'),
    (None, 'marker-start'),
    (None, 'marker-mid'),
    (None, 'marker-end'),
    (None, 'mask'),
    (None, 'stroke'),
 ))
 svg_allow_local_href = frozenset((
    (None, 'altGlyph'),
    (None, 'animate'),
    (None, 'animateColor'),
    (None, 'animateMotion'),
    (None, 'animateTransform'),
    (None, 'cursor'),
    (None, 'feImage'),
    (None, 'filter'),
    (None, 'linearGradient'),
    (None, 'pattern'),
    (None, 'radialGradient'),
    (None, 'textpath'),
    (None, 'tref'),
    (None, 'set'),
    (None, 'use')
 ))
 allowed_css_properties = frozenset((
    'azimuth',
    'background-color',
    'border-bottom-color',
    'border-collapse',
    'border-color',
    'border-left-color',
    'border-right-color',
    'border-top-color',
    'clear',
    'color',
    'cursor',
    'direction',
    'display',
    'elevation',
    'float',
    'font',
    'font-family',
    'font-size',
    'font-style',
    'font-variant',
    'font-weight',
    'height',
    'letter-spacing',
    'line-height',
    'overflow',
    'pause',
    'pause-after',
    'pause-before',
    'pitch',
    'pitch-range',
    'richness',
    'speak',
    'speak-header',
    'speak-numeral',
    'speak-punctuation',
    'speech-rate',
    'stress',
    'text-align',
    'text-decoration',
    'text-indent',
    'unicode-bidi',
    'vertical-align',
    'voice-family',
    'volume',
    'white-space',
    'width',
 ))
 allowed_css_keywords = frozenset((
    'auto',
    'aqua',
    'black',
    'block',
    'blue',
    'bold',
    'both',
    'bottom',
    'brown',
    'center',
    'collapse',
    'dashed',
    'dotted',
    'fuchsia',
    'gray',
    'green',
    '!important',
    'italic',
    'left',
    'lime',
    'maroon',
    'medium',
    'none',
    'navy',
    'normal',
    'nowrap',
    'olive',
    'pointer',
    'purple',
    'red',
    'right',
    'solid',
    'silver',
    'teal',
    'top',
    'transparent',
    'underline',
    'white',
    'yellow',
 ))
 allowed_svg_properties = frozenset((
    'fill',
    'fill-opacity',
    'fill-rule',
    'stroke',
    'stroke-width',
    'stroke-linecap',
    'stroke-linejoin',
    'stroke-opacity',
 ))
 allowed_protocols = frozenset((
    'ed2k',
    'ftp',
    'http',
    'https',
    'irc',
    'mailto',
    'news',
    'gopher',
    'nntp',
    'telnet',
    'webcal',
    'xmpp',
    'callto',
    'feed',
    'urn',
    'aim',
    'rsync',
    'tag',
    'ssh',
    'sftp',
    'rtsp',
    'afs',
    'data',
 ))
 allowed_content_types = frozenset((
    'image/png',
    'image/jpeg',
    'image/gif',
    'image/webp',
    'image/bmp',
    'text/plain',
 ))
 data_content_type = re.compile(r'''
                                ^
                                # Match a content type <application>/<type>
                                (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
                                # Match any character set and encoding
                                (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
                                  |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
                                # Assume the rest is data
                                ,.*
                                $
                                ''',
                               re.VERBOSE)
 class Filter(base.Filter):
    """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
    def __init__(self,
                 source,
                 allowed_elements=allowed_elements,
                 allowed_attributes=allowed_attributes,
                 allowed_css_properties=allowed_css_properties,
                 allowed_css_keywords=allowed_css_keywords,
                 allowed_svg_properties=allowed_svg_properties,
                 allowed_protocols=allowed_protocols,
                 allowed_content_types=allowed_content_types,
                 attr_val_is_uri=attr_val_is_uri,
                 svg_attr_val_allows_ref=svg_attr_val_allows_ref,
                 svg_allow_local_href=svg_allow_local_href):
        """Creates a Filter
        :arg allowed_elements: set of elements to allow--everything else will
            be escaped
        :arg allowed_attributes: set of attributes to allow in
            elements--everything else will be stripped
        :arg allowed_css_properties: set of CSS properties to allow--everything
            else will be stripped
        :arg allowed_css_keywords: set of CSS keywords to allow--everything
            else will be stripped
        :arg allowed_svg_properties: set of SVG properties to allow--everything
            else will be removed
        :arg allowed_protocols: set of allowed protocols for URIs
        :arg allowed_content_types: set of allowed content types for ``data`` URIs.
        :arg attr_val_is_uri: set of attributes that have URI values--values
            that have a scheme not listed in ``allowed_protocols`` are removed
        :arg svg_attr_val_allows_ref: set of SVG attributes that can have
            references
        :arg svg_allow_local_href: set of SVG elements that can have local
            hrefs--these are removed
        """
        super(Filter, self).__init__(source)
        self.allowed_elements = allowed_elements
        self.allowed_attributes = allowed_attributes
        self.allowed_css_properties = allowed_css_properties
        self.allowed_css_keywords = allowed_css_keywords
        self.allowed_svg_properties = allowed_svg_properties
        self.allowed_protocols = allowed_protocols
        self.allowed_content_types = allowed_content_types
        self.attr_val_is_uri = attr_val_is_uri
        self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
        self.svg_allow_local_href = svg_allow_local_href
    def __iter__(self):
        for token in base.Filter.__iter__(self):
            token = self.sanitize_token(token)
            if token:
                yield token
    # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
    # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
    # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
    # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
    # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
    # allowed.
    #
    #   sanitize_html('<script> do_nasty_stuff() </script>')
    #    => &lt;script> do_nasty_stuff() &lt;/script>
    #   sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
    #    => <a>Click here for $100</a>
    def sanitize_token(self, token):
        # accommodate filters which use token_type differently
        token_type = token["type"]
        if token_type in ("StartTag", "EndTag", "EmptyTag"):
            name = token["name"]
            namespace = token["namespace"]
            if ((namespace, name) in self.allowed_elements or
                (namespace is None and
                 (namespaces["html"], name) in self.allowed_elements)):
                return self.allowed_token(token)
            else:
                return self.disallowed_token(token)
        elif token_type == "Comment":
            pass
        else:
            return token
    def allowed_token(self, token):
        if "data" in token:
            attrs = token["data"]
            attr_names = set(attrs.keys())
            # Remove forbidden attributes
            for to_remove in (attr_names - self.allowed_attributes):
                del token["data"][to_remove]
                attr_names.remove(to_remove)
            # Remove attributes with disallowed URL values
            for attr in (attr_names & self.attr_val_is_uri):
                assert attr in attrs
                # I don't have a clue where this regexp comes from or why it matches those
                # characters, nor why we call unescape. I just know it's always been here.
                # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
                # this will do is remove *more* than it otherwise would.
                val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
                                       unescape(attrs[attr])).lower()
                # remove replacement characters from unescaped characters
                val_unescaped = val_unescaped.replace("\ufffd", "")
                try:
                    uri = urlparse.urlparse(val_unescaped)
                except ValueError:
                    uri = None
                    del attrs[attr]
                if uri and uri.scheme:
                    if uri.scheme not in self.allowed_protocols:
                        del attrs[attr]
                    if uri.scheme == 'data':
                        m = data_content_type.match(uri.path)
                        if not m:
                            del attrs[attr]
                        elif m.group('content_type') not in self.allowed_content_types:
                            del attrs[attr]
            for attr in self.svg_attr_val_allows_ref:
                if attr in attrs:
                    attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
                                         ' ',
                                         unescape(attrs[attr]))
            if (token["name"] in self.svg_allow_local_href and
                (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
                                                                     attrs[(namespaces['xlink'], 'href')])):
                del attrs[(namespaces['xlink'], 'href')]
            if (None, 'style') in attrs:
                attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
            token["data"] = attrs
        return token
    def disallowed_token(self, token):
        token_type = token["type"]
        if token_type == "EndTag":
            token["data"] = "</%s>" % token["name"]
        elif token["data"]:
            assert token_type in ("StartTag", "EmptyTag")
            attrs = []
            for (ns, name), v in token["data"].items():
                attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
            token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
        else:
            token["data"] = "<%s>" % token["name"]
        if token.get("selfClosing"):
            token["data"] = token["data"][:-1] + "/>"
        token["type"] = "Characters"
        del token["name"]
        return token
    def sanitize_css(self, style):
        # disallow urls
        style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
        # gauntlet
        if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
            return ''
        if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
            return ''
        clean = []
        for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
            if not value:
                continue
            if prop.lower() in self.allowed_css_properties:
                clean.append(prop + ': ' + value + ';')
            elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
                                                'padding']:
                for keyword in value.split():
                    if keyword not in self.allowed_css_keywords and \
                            not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword):  # noqa
                        break
                else:
                    clean.append(prop + ': ' + value + ';')
            elif prop.lower() in self.allowed_svg_properties:
                clean.append(prop + ': ' + value + ';')
        return ' '.join(clean)
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/whitespace.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/filters/whitespace.py
@ -2,20 +2,20 @@ from __future__ import absolute_import, division, unicode_literals
 import re
-from . import _base
+from . import base
 from ..constants import rcdataElements, spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
 SPACES_REGEX = re.compile("[%s]+" % spaceCharacters)
-class Filter(_base.Filter):
+class Filter(base.Filter):
-
+    """Collapses whitespace except in pre, textarea, and script elements"""
    spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
    def __iter__(self):
        preserve = 0
-        for token in _base.Filter.__iter__(self):
+        for token in base.Filter.__iter__(self):
            type = token["type"]
            if type == "StartTag" \
                    and (preserve or token["name"] in self.spacePreserveElements):
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/html5parser.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/html5parser.py
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/serializer.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/serializer.py
@ -0,0 +1,409 @@
 from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 import re
 from codecs import register_error, xmlcharrefreplace_errors
 from .constants import voidElements, booleanAttributes, spaceCharacters
 from .constants import rcdataElements, entities, xmlEntities
 from . import treewalkers, _utils
 from xml.sax.saxutils import escape
 _quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`"
 _quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]")
 _quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars +
                                   "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
                                   "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
                                   "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
                                   "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
                                   "\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
                                   "\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
                                   "\u3000]")
 _encode_entity_map = {}
 _is_ucs4 = len("\U0010FFFF") == 1
 for k, v in list(entities.items()):
    # skip multi-character entities
    if ((_is_ucs4 and len(v) > 1) or
            (not _is_ucs4 and len(v) > 2)):
        continue
    if v != "&":
        if len(v) == 2:
            v = _utils.surrogatePairToCodepoint(v)
        else:
            v = ord(v)
        if v not in _encode_entity_map or k.islower():
            # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
            _encode_entity_map[v] = k
 def htmlentityreplace_errors(exc):
    if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
        res = []
        codepoints = []
        skip = False
        for i, c in enumerate(exc.object[exc.start:exc.end]):
            if skip:
                skip = False
                continue
            index = i + exc.start
            if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]):
                codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2])
                skip = True
            else:
                codepoint = ord(c)
            codepoints.append(codepoint)
        for cp in codepoints:
            e = _encode_entity_map.get(cp)
            if e:
                res.append("&")
                res.append(e)
                if not e.endswith(";"):
                    res.append(";")
            else:
                res.append("&#x%s;" % (hex(cp)[2:]))
        return ("".join(res), exc.end)
    else:
        return xmlcharrefreplace_errors(exc)
 register_error("htmlentityreplace", htmlentityreplace_errors)
 def serialize(input, tree="etree", encoding=None, **serializer_opts):
    """Serializes the input token stream using the specified treewalker
    :arg input: the token stream to serialize
    :arg tree: the treewalker to use
    :arg encoding: the encoding to use
    :arg serializer_opts: any options to pass to the
        :py:class:`html5lib.serializer.HTMLSerializer` that gets created
    :returns: the tree serialized as a string
    Example:
    >>> from html5lib.html5parser import parse
    >>> from html5lib.serializer import serialize
    >>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
    >>> serialize(token_stream, omit_optional_tags=False)
    '<html><head></head><body><p>Hi!</p></body></html>'
    """
    # XXX: Should we cache this?
    walker = treewalkers.getTreeWalker(tree)
    s = HTMLSerializer(**serializer_opts)
    return s.render(walker(input), encoding)
 class HTMLSerializer(object):
    # attribute quoting options
    quote_attr_values = "legacy"  # be secure by default
    quote_char = '"'
    use_best_quote_char = True
    # tag syntax options
    omit_optional_tags = True
    minimize_boolean_attributes = True
    use_trailing_solidus = False
    space_before_trailing_solidus = True
    # escaping options
    escape_lt_in_attrs = False
    escape_rcdata = False
    resolve_entities = True
    # miscellaneous options
    alphabetical_attributes = False
    inject_meta_charset = True
    strip_whitespace = False
    sanitize = False
    options = ("quote_attr_values", "quote_char", "use_best_quote_char",
               "omit_optional_tags", "minimize_boolean_attributes",
               "use_trailing_solidus", "space_before_trailing_solidus",
               "escape_lt_in_attrs", "escape_rcdata", "resolve_entities",
               "alphabetical_attributes", "inject_meta_charset",
               "strip_whitespace", "sanitize")
    def __init__(self, **kwargs):
        """Initialize HTMLSerializer
        :arg inject_meta_charset: Whether or not to inject the meta charset.
            Defaults to ``True``.
        :arg quote_attr_values: Whether to quote attribute values that don't
            require quoting per legacy browser behavior (``"legacy"``), when
            required by the standard (``"spec"``), or always (``"always"``).
            Defaults to ``"legacy"``.
        :arg quote_char: Use given quote character for attribute quoting.
            Defaults to ``"`` which will use double quotes unless attribute
            value contains a double quote, in which case single quotes are
            used.
        :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
            values.
            Defaults to ``False``.
        :arg escape_rcdata: Whether to escape characters that need to be
            escaped within normal elements within rcdata elements such as
            style.
            Defaults to ``False``.
        :arg resolve_entities: Whether to resolve named character entities that
            appear in the source tree. The XML predefined entities &lt; &gt;
            &amp; &quot; &apos; are unaffected by this setting.
            Defaults to ``True``.
        :arg strip_whitespace: Whether to remove semantically meaningless
            whitespace. (This compresses all whitespace to a single space
            except within ``pre``.)
            Defaults to ``False``.
        :arg minimize_boolean_attributes: Shortens boolean attributes to give
            just the attribute value, for example::
              <input disabled="disabled">
            becomes::
              <input disabled>
            Defaults to ``True``.
        :arg use_trailing_solidus: Includes a close-tag slash at the end of the
            start tag of void elements (empty elements whose end tag is
            forbidden). E.g. ``<hr/>``.
            Defaults to ``False``.
        :arg space_before_trailing_solidus: Places a space immediately before
            the closing slash in a tag using a trailing solidus. E.g.
            ``<hr />``. Requires ``use_trailing_solidus=True``.
            Defaults to ``True``.
        :arg sanitize: Strip all unsafe or unknown constructs from output.
            See :py:class:`html5lib.filters.sanitizer.Filter`.
            Defaults to ``False``.
        :arg omit_optional_tags: Omit start/end tags that are optional.
            Defaults to ``True``.
        :arg alphabetical_attributes: Reorder attributes to be in alphabetical order.
            Defaults to ``False``.
        """
        unexpected_args = frozenset(kwargs) - frozenset(self.options)
        if len(unexpected_args) > 0:
            raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args)))
        if 'quote_char' in kwargs:
            self.use_best_quote_char = False
        for attr in self.options:
            setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
        self.errors = []
        self.strict = False
    def encode(self, string):
        assert(isinstance(string, text_type))
        if self.encoding:
            return string.encode(self.encoding, "htmlentityreplace")
        else:
            return string
    def encodeStrict(self, string):
        assert(isinstance(string, text_type))
        if self.encoding:
            return string.encode(self.encoding, "strict")
        else:
            return string
    def serialize(self, treewalker, encoding=None):
        # pylint:disable=too-many-nested-blocks
        self.encoding = encoding
        in_cdata = False
        self.errors = []
        if encoding and self.inject_meta_charset:
            from .filters.inject_meta_charset import Filter
            treewalker = Filter(treewalker, encoding)
        # Alphabetical attributes is here under the assumption that none of
        # the later filters add or change order of attributes; it needs to be
        # before the sanitizer so escaped elements come out correctly
        if self.alphabetical_attributes:
            from .filters.alphabeticalattributes import Filter
            treewalker = Filter(treewalker)
        # WhitespaceFilter should be used before OptionalTagFilter
        # for maximum efficiently of this latter filter
        if self.strip_whitespace:
            from .filters.whitespace import Filter
            treewalker = Filter(treewalker)
        if self.sanitize:
            from .filters.sanitizer import Filter
            treewalker = Filter(treewalker)
        if self.omit_optional_tags:
            from .filters.optionaltags import Filter
            treewalker = Filter(treewalker)
        for token in treewalker:
            type = token["type"]
            if type == "Doctype":
                doctype = "<!DOCTYPE %s" % token["name"]
                if token["publicId"]:
                    doctype += ' PUBLIC "%s"' % token["publicId"]
                elif token["systemId"]:
                    doctype += " SYSTEM"
                if token["systemId"]:
                    if token["systemId"].find('"') >= 0:
                        if token["systemId"].find("'") >= 0:
                            self.serializeError("System identifer contains both single and double quote characters")
                        quote_char = "'"
                    else:
                        quote_char = '"'
                    doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
                doctype += ">"
                yield self.encodeStrict(doctype)
            elif type in ("Characters", "SpaceCharacters"):
                if type == "SpaceCharacters" or in_cdata:
                    if in_cdata and token["data"].find("</") >= 0:
                        self.serializeError("Unexpected </ in CDATA")
                    yield self.encode(token["data"])
                else:
                    yield self.encode(escape(token["data"]))
            elif type in ("StartTag", "EmptyTag"):
                name = token["name"]
                yield self.encodeStrict("<%s" % name)
                if name in rcdataElements and not self.escape_rcdata:
                    in_cdata = True
                elif in_cdata:
                    self.serializeError("Unexpected child element of a CDATA element")
                for (_, attr_name), attr_value in token["data"].items():
                    # TODO: Add namespace support here
                    k = attr_name
                    v = attr_value
                    yield self.encodeStrict(' ')
                    yield self.encodeStrict(k)
                    if not self.minimize_boolean_attributes or \
                        (k not in booleanAttributes.get(name, tuple()) and
                         k not in booleanAttributes.get("", tuple())):
                        yield self.encodeStrict("=")
                        if self.quote_attr_values == "always" or len(v) == 0:
                            quote_attr = True
                        elif self.quote_attr_values == "spec":
                            quote_attr = _quoteAttributeSpec.search(v) is not None
                        elif self.quote_attr_values == "legacy":
                            quote_attr = _quoteAttributeLegacy.search(v) is not None
                        else:
                            raise ValueError("quote_attr_values must be one of: "
                                             "'always', 'spec', or 'legacy'")
                        v = v.replace("&", "&amp;")
                        if self.escape_lt_in_attrs:
                            v = v.replace("<", "&lt;")
                        if quote_attr:
                            quote_char = self.quote_char
                            if self.use_best_quote_char:
                                if "'" in v and '"' not in v:
                                    quote_char = '"'
                                elif '"' in v and "'" not in v:
                                    quote_char = "'"
                            if quote_char == "'":
                                v = v.replace("'", "&#39;")
                            else:
                                v = v.replace('"', "&quot;")
                            yield self.encodeStrict(quote_char)
                            yield self.encode(v)
                            yield self.encodeStrict(quote_char)
                        else:
                            yield self.encode(v)
                if name in voidElements and self.use_trailing_solidus:
                    if self.space_before_trailing_solidus:
                        yield self.encodeStrict(" /")
                    else:
                        yield self.encodeStrict("/")
                yield self.encode(">")
            elif type == "EndTag":
                name = token["name"]
                if name in rcdataElements:
                    in_cdata = False
                elif in_cdata:
                    self.serializeError("Unexpected child element of a CDATA element")
                yield self.encodeStrict("</%s>" % name)
            elif type == "Comment":
                data = token["data"]
                if data.find("--") >= 0:
                    self.serializeError("Comment contains --")
                yield self.encodeStrict("<!--%s-->" % token["data"])
            elif type == "Entity":
                name = token["name"]
                key = name + ";"
                if key not in entities:
                    self.serializeError("Entity %s not recognized" % name)
                if self.resolve_entities and key not in xmlEntities:
                    data = entities[key]
                else:
                    data = "&%s;" % name
                yield self.encodeStrict(data)
            else:
                self.serializeError(token["data"])
    def render(self, treewalker, encoding=None):
        """Serializes the stream from the treewalker into a string
        :arg treewalker: the treewalker to serialize
        :arg encoding: the string encoding to use
        :returns: the serialized tree
        Example:
        >>> from html5lib import parse, getTreeWalker
        >>> from html5lib.serializer import HTMLSerializer
        >>> token_stream = parse('<html><body>Hi!</body></html>')
        >>> walker = getTreeWalker('etree')
        >>> serializer = HTMLSerializer(omit_optional_tags=False)
        >>> serializer.render(walker(token_stream))
        '<html><head></head><body>Hi!</body></html>'
        """
        if encoding:
            return b"".join(list(self.serialize(treewalker, encoding)))
        else:
            return "".join(list(self.serialize(treewalker)))
    def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
        # XXX The idea is to make data mandatory.
        self.errors.append(data)
        if self.strict:
            raise SerializeError
 class SerializeError(Exception):
    """Error in serialized tree"""
    pass
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/init.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/init.py
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/conftest.py
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/conftest.py
@ -0,0 +1,108 @@
 from __future__ import print_function
 import os.path
 import sys
 import pkg_resources
 import pytest
 from .tree_construction import TreeConstructionFile
 from .tokenizer import TokenizerFile
 from .sanitizer import SanitizerFile
 _dir = os.path.abspath(os.path.dirname(__file__))
 _root = os.path.join(_dir, "..", "..")
 _testdata = os.path.join(_dir, "testdata")
 _tree_construction = os.path.join(_testdata, "tree-construction")
 _tokenizer = os.path.join(_testdata, "tokenizer")
 _sanitizer_testdata = os.path.join(_dir, "sanitizer-testdata")
 def fail_if_missing_pytest_expect():
    """Throws an exception halting pytest if pytest-expect isn't working"""
    try:
        from pytest_expect import expect  # noqa
    except ImportError:
        header = '*' * 78
        print(
            '\n' +
            header + '\n' +
            'ERROR: Either pytest-expect or its dependency u-msgpack-python is not\n' +
            'installed. Please install them both before running pytest.\n' +
            header + '\n',
            file=sys.stderr
        )
        raise
 fail_if_missing_pytest_expect()
 def pytest_configure(config):
    msgs = []
    if not os.path.exists(_testdata):
        msg = "testdata not available! "
        if os.path.exists(os.path.join(_root, ".git")):
            msg += ("Please run git submodule update --init --recursive " +
                    "and then run tests again.")
        else:
            msg += ("The testdata doesn't appear to be included with this package, " +
                    "so finding the right version will be hard. :(")
        msgs.append(msg)
    if config.option.update_xfail:
        # Check for optional requirements
        req_file = os.path.join(_root, "requirements-optional.txt")
        if os.path.exists(req_file):
            with open(req_file, "r") as fp:
                for line in fp:
                    if (line.strip() and
                        not (line.startswith("-r") or
                             line.startswith("#"))):
                        if ";" in line:
                            spec, marker = line.strip().split(";", 1)
                        else:
                            spec, marker = line.strip(), None
                        req = pkg_resources.Requirement.parse(spec)
                        if marker and not pkg_resources.evaluate_marker(marker):
                            msgs.append("%s not available in this environment" % spec)
                        else:
                            try:
                                installed = pkg_resources.working_set.find(req)
                            except pkg_resources.VersionConflict:
                                msgs.append("Outdated version of %s installed, need %s" % (req.name, spec))
                            else:
                                if not installed:
                                    msgs.append("Need %s" % spec)
        # Check cElementTree
        import xml.etree.ElementTree as ElementTree
        try:
            import xml.etree.cElementTree as cElementTree
        except ImportError:
            msgs.append("cElementTree unable to be imported")
        else:
            if cElementTree.Element is ElementTree.Element:
                msgs.append("cElementTree is just an alias for ElementTree")
    if msgs:
        pytest.exit("\n".join(msgs))
 def pytest_collect_file(path, parent):
    dir = os.path.abspath(path.dirname)
    dir_and_parents = set()
    while dir not in dir_and_parents:
        dir_and_parents.add(dir)
        dir = os.path.dirname(dir)
    if _tree_construction in dir_and_parents:
        if path.ext == ".dat":
            return TreeConstructionFile(path, parent)
    elif _tokenizer in dir_and_parents:
        if path.ext == ".test":
            return TokenizerFile(path, parent)
    elif _sanitizer_testdata in dir_and_parents:
        if path.ext == ".dat":
            return SanitizerFile(path, parent)
--- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/sanitizer-testdata/tests1.dat
+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/tests/sanitizer-testdata/tests1.dat
@ -0,0 +1,433 @@
 [
  {
    "name": "IE_Comments",
    "input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->",
    "output": ""
  },
  {
    "name": "IE_Comments_2",
    "input": "<![if !IE 5]><script>alert('XSS');</script><![endif]>",
    "output": "&lt;script&gt;alert('XSS');&lt;/script&gt;"
  },
  {
    "name": "allow_colons_in_path_component",
    "input": "<a href=\"./this:that\">foo</a>",
    "output": "<a href='./this:that'>foo</a>"
  },
  {
    "name": "background_attribute",
    "input": "<div background=\"javascript:alert('XSS')\"></div>",
    "output": "<div></div>"
  },
  {
    "name": "bgsound",
    "input": "<bgsound src=\"javascript:alert('XSS');\" />",
    "output": "&lt;bgsound src=\"javascript:alert('XSS');\"&gt;&lt;/bgsound&gt;"
  },
  {
    "name": "div_background_image_unicode_encoded",
    "input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>",
    "output": "<div style=''>foo</div>"
  },
  {
    "name": "div_expression",
    "input": "<div style=\"width: expression(alert('XSS'));\">foo</div>",
    "output": "<div style=''>foo</div>"
  },
  {
    "name": "double_open_angle_brackets",
    "input": "<img src=http://ha.ckers.org/scriptlet.html <",
    "output": ""
  },
  {
    "name": "double_open_angle_brackets_2",
    "input": "<script src=http://ha.ckers.org/scriptlet.html <",
    "output": ""
  },
  {
    "name": "grave_accents",
    "input": "<img src=`javascript:alert('XSS')` />",
    "output": "<img/>"
  },
  {
    "name": "img_dynsrc_lowsrc",
    "input": "<img dynsrc=\"javascript:alert('XSS')\" />",
    "output": "<img/>"
  },
  {
    "name": "img_vbscript",
    "input": "<img src='vbscript:msgbox(\"XSS\")' />",
    "output": "<img/>"
  },
  {
    "name": "input_image",
    "input": "<input type=\"image\" src=\"javascript:alert('XSS');\" />",
    "output": "<input type='image'/>"
  },
  {
    "name": "link_stylesheets",
    "input": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\" />",
    "output": "&lt;link href=\"javascript:alert('XSS');\" rel=\"stylesheet\"&gt;"
  },
  {
    "name": "link_stylesheets_2",
    "input": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\" />",
    "output": "&lt;link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\"&gt;"
  },
  {
    "name": "list_style_image",
    "input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>",
    "output": "<li style=''>foo</li>"
  },
  {
    "name": "no_closing_script_tags",
    "input": "<script src=http://ha.ckers.org/xss.js?<b>",
    "output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;&lt;/script&gt;"
  },
  {
    "name": "non_alpha_non_digit",
    "input": "<script/XSS src=\"http://ha.ckers.org/xss.js\"></script>",
    "output": "&lt;script src=\"http://ha.ckers.org/xss.js\" xss=\"\"&gt;&lt;/script&gt;"
  },
  {
    "name": "non_alpha_non_digit_2",
    "input": "<a onclick!\\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
    "output": "<a>foo</a>"
  },
  {
    "name": "non_alpha_non_digit_3",
    "input": "<img/src=\"http://ha.ckers.org/xss.js\"/>",
    "output": "<img src='http://ha.ckers.org/xss.js'/>"
  },
  {
    "name": "non_alpha_non_digit_II",
    "input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
    "output": "<a>foo</a>"
  },
  {
    "name": "non_alpha_non_digit_III",
    "input": "<a/href=\"javascript:alert('XSS');\">foo</a>",
    "output": "<a>foo</a>"
  },
  {
    "name": "platypus",
    "input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
    "output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>"
  },
  {
    "name": "protocol_resolution_in_script_tag",
    "input": "<script src=//ha.ckers.org/.j></script>",
    "output": "&lt;script src=\"//ha.ckers.org/.j\"&gt;&lt;/script&gt;"
  },
  {
    "name": "should_allow_anchors",
    "input": "<a href='foo' onclick='bar'><script>baz</script></a>",
    "output": "<a href='foo'>&lt;script&gt;baz&lt;/script&gt;</a>"
  },
  {
    "name": "should_allow_image_alt_attribute",
    "input": "<img alt='foo' onclick='bar' />",
    "output": "<img alt='foo'/>"
  },
  {
    "name": "should_allow_image_height_attribute",
    "input": "<img height='foo' onclick='bar' />",
    "output": "<img height='foo'/>"
  },
  {
    "name": "should_allow_image_src_attribute",
    "input": "<img src='foo' onclick='bar' />",
    "output": "<img src='foo'/>"
  },
  {
    "name": "should_allow_image_width_attribute",
    "input": "<img width='foo' onclick='bar' />",
    "output": "<img width='foo'/>"
  },
  {
    "name": "should_handle_blank_text",
    "input": "",
    "output": ""
  },
  {
    "name": "should_handle_malformed_image_tags",
    "input": "<img \"\"\"><script>alert(\"XSS\")</script>\">",
    "output": "<img/>&lt;script&gt;alert(\"XSS\")&lt;/script&gt;\"&gt;"
  },
  {
    "name": "should_handle_non_html",
    "input": "abc",
    "output": "abc"
  },
  {
    "name": "should_not_fall_for_ridiculous_hack",
    "input": "<img\nsrc\n=\n\"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n\"\n />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_0",
    "input": "<img src=\"javascript:alert('XSS');\" />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_1",
    "input": "<img src=javascript:alert('XSS') />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_10",
    "input": "<img src=\"jav&#x0A;ascript:alert('XSS');\" />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_11",
    "input": "<img src=\"jav&#x0D;ascript:alert('XSS');\" />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_12",
    "input": "<img src=\" &#14;  javascript:alert('XSS');\" />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_13",
    "input": "<img src=\"&#x20;javascript:alert('XSS');\" />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_14",
    "input": "<img src=\"&#xA0;javascript:alert('XSS');\" />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_2",
    "input": "<img src=\"JaVaScRiPt:alert('XSS')\" />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_3",
    "input": "<img src='javascript:alert(&quot;XSS&quot;)' />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_4",
    "input": "<img src='javascript:alert(String.fromCharCode(88,83,83))' />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_5",
    "input": "<img src='&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;' />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_6",
    "input": "<img src='&#0000106;&#0000097;&#0000118;&#0000097;&#0000115;&#0000099;&#0000114;&#0000105;&#0000112;&#0000116;&#0000058;&#0000097;&#0000108;&#0000101;&#0000114;&#0000116;&#0000040;&#0000039;&#0000088;&#0000083;&#0000083;&#0000039;&#0000041' />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_7",
    "input": "<img src='&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x58;&#x53;&#x53;&#x27;&#x29' />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_8",
    "input": "<img src=\"jav\tascript:alert('XSS');\" />",
    "output": "<img/>"
  },
  {
    "name": "should_not_fall_for_xss_image_hack_9",
    "input": "<img src=\"jav&#x09;ascript:alert('XSS');\" />",
    "output": "<img/>"
  },
  {
    "name": "should_sanitize_half_open_scripts",
    "input": "<img src=\"javascript:alert('XSS')\"",
    "output": ""
  },
  {
    "name": "should_sanitize_invalid_script_tag",
    "input": "<script/XSS SRC=\"http://ha.ckers.org/xss.js\"></script>",
    "output": "&lt;script src=\"http://ha.ckers.org/xss.js\" xss=\"\"&gt;&lt;/script&gt;"
  },
  {
    "name": "should_sanitize_script_tag_with_multiple_open_brackets",
    "input": "<<script>alert(\"XSS\");//<</script>",
    "output": "&lt;&lt;script&gt;alert(\"XSS\");//&lt;&lt;/script&gt;"
  },
  {
    "name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
    "input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<",
    "output": ""
  },
  {
    "name": "should_sanitize_tag_broken_up_by_null",
    "input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
    "output": "&lt;scr\ufffdipt&gt;alert(\"XSS\")&lt;/scr\ufffdipt&gt;"
  },
  {
    "name": "should_sanitize_unclosed_script",
    "input": "<script src=http://ha.ckers.org/xss.js?<b>",
    "output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;&lt;/script&gt;"
  },
  {
    "name": "should_strip_href_attribute_in_a_with_bad_protocols",
    "input": "<a href=\"javascript:XSS\" title=\"1\">boo</a>",
    "output": "<a title='1'>boo</a>"
  },
  {
    "name": "should_strip_href_attribute_in_a_with_bad_protocols_and_whitespace",
    "input": "<a href=\" javascript:XSS\" title=\"1\">boo</a>",
    "output": "<a title='1'>boo</a>"
  },
  {
    "name": "should_strip_src_attribute_in_img_with_bad_protocols",
    "input": "<img src=\"javascript:XSS\" title=\"1\">boo</img>",
    "output": "<img title='1'/>boo"
  },
  {
    "name": "should_strip_src_attribute_in_img_with_bad_protocols_and_whitespace",
    "input": "<img src=\" javascript:XSS\" title=\"1\">boo</img>",
    "output": "<img title='1'/>boo"
  },
  {
    "name": "xml_base",
    "input": "<div xml:base=\"javascript:alert('XSS');//\">foo</div>",
    "output": "<div>foo</div>"
  },
  {
    "name": "xul",
    "input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
    "output": "<p style=''>fubar</p>"
  },
  {
    "name": "quotes_in_attributes",
    "input": "<img src='foo' title='\"foo\" bar' />",
    "output": "<img src='foo' title='\"foo\" bar'/>"
  },
  {
    "name": "uri_refs_in_svg_attributes",
    "input": "<svg><rect fill='url(#foo)' />",
    "output": "<svg><rect fill='url(#foo)'></rect></svg>"
  },
  {
    "name": "absolute_uri_refs_in_svg_attributes",
    "input": "<svg><rect fill='url(http://bad.com/) #fff' />",
    "output": "<svg><rect fill='  #fff'></rect></svg>"
  },
  {
    "name": "uri_ref_with_space_in svg_attribute",
    "input": "<svg><rect fill='url(\n#foo)' />",
    "output": "<svg><rect fill='url(\n#foo)'></rect></svg>"
  },
  {
    "name": "absolute_uri_ref_with_space_in svg_attribute",
    "input": "<svg><rect fill=\"url(\nhttp://bad.com/)\" />",
    "output": "<svg><rect fill=' '></rect></svg>"
  },
  {
    "name": "allow_html5_image_tag",
    "input": "<image src='foo' />",
    "output": "<img src='foo'/>"
  },
  {
    "name": "style_attr_end_with_nothing",
    "input": "<div style=\"color: blue\" />",
    "output": "<div style='color: blue;'></div>"
  },
  {
    "name": "style_attr_end_with_space",
    "input": "<div style=\"color: blue \" />",
    "output": "<div style='color: blue ;'></div>"
  },
  {
    "name": "style_attr_end_with_semicolon",
    "input": "<div style=\"color: blue;\" />",
    "output": "<div style='color: blue;'></div>"
  },
  {
    "name": "style_attr_end_with_semicolon_space",
    "input": "<div style=\"color: blue; \" />",
    "output": "<div style='color: blue;'></div>"
  },
  {
   "name": "attributes_with_embedded_quotes",
   "input": "<img src=doesntexist.jpg\"'onerror=\"alert(1) />",
   "output": "<img src='doesntexist.jpg\"&#39;onerror=\"alert(1)'/>"
  },
  {
   "name": "attributes_with_embedded_quotes_II",
   "input": "<img src=notthere.jpg\"\"onerror=\"alert(2) />",
   "output": "<img src='notthere.jpg\"\"onerror=\"alert(2)'/>"
  }
 ]
--- a/Показать больше
+++ b/Показать больше
		`@ -1 +0,0 @@`
			Each testcase file can be run through nose (using ``nosetests``).